From 7f9e19120b344c40d57d719032edad8ee1952267 Mon Sep 17 00:00:00 2001 From: Ryan Lai Date: Wed, 9 Sep 2020 15:59:26 -0700 Subject: [PATCH] Build MicrosoftMLRunner for using WinML nuget and make WinMLRunner only load from System32 (#348) * move some headers around * add nuget * Refactor bindingutilities and outputhelper into .cpp and header files * remove dllload.cpp * Remove windows.ai.machinelearning header * Fix testing * Don't change sln * Fix x86 build * Added different configurations * Add preprocessor definitions to separate between headers and namespaces * Change naming of EXE for nuget builds * Update OutputHelper.cpp remove pragma in outputhelper.cpp * remove pragma from cpp * Switch between native header and update documentation * use const cast * rename nuget to NuGet and address PR comments Co-authored-by: Ryan Lai --- Testing/WinMLRunnerTest/WinMLRunnerTest.cpp | 106 +- .../WinMLRunnerTest/WinMLRunnerTest.vcxproj | 354 +++++ Tools/WinMLRunner/README.md | 7 +- Tools/WinMLRunner/WinMLRunner.sln | 124 +- Tools/WinMLRunner/WinMLRunner.vcxproj | 159 ++- Tools/WinMLRunner/WinMLRunnerDLL.vcxproj | 234 ---- .../WinMLRunner/WinMLRunnerScenarios.vcxproj | 174 ++- .../WinMLRunnerScenarios.vcxproj.filters | 3 + .../WinMLRunner/WinMLRunnerStaticLib.vcxproj | 152 +- .../WinMLRunnerStaticLib.vcxproj.filters | 12 +- .../WinMLRunner/WinMLRunner_Link_DLL.vcxproj | 267 ---- Tools/WinMLRunner/packages.config | 5 + Tools/WinMLRunner/src/BindingUtilities.cpp | 1008 ++++++++++++++ Tools/WinMLRunner/src/BindingUtilities.h | 970 +------------ Tools/WinMLRunner/src/CommandLineArgs.cpp | 29 +- Tools/WinMLRunner/src/CommandLineArgs.h | 3 +- Tools/WinMLRunner/src/Common.h | 6 +- Tools/WinMLRunner/src/Concurrency.cpp | 4 + .../src/LearningModelDeviceHelper.cpp | 6 +- Tools/WinMLRunner/src/OutputHelper.cpp | 1197 ++++++++++++++++ Tools/WinMLRunner/src/OutputHelper.h | 1218 +---------------- Tools/WinMLRunner/src/Run.cpp | 1 + Tools/WinMLRunner/src/Run.h | 2 +- Tools/WinMLRunner/src/TimerHelper.h | 2 - Tools/WinMLRunner/src/TypeHelper.h | 4 + 25 files changed, 3223 insertions(+), 2824 deletions(-) delete mode 100644 Tools/WinMLRunner/WinMLRunnerDLL.vcxproj delete mode 100644 Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj create mode 100644 Tools/WinMLRunner/packages.config create mode 100644 Tools/WinMLRunner/src/BindingUtilities.cpp create mode 100644 Tools/WinMLRunner/src/OutputHelper.cpp diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp index 7a625fe5..db0b4e89 100644 --- a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp +++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp @@ -15,7 +15,7 @@ #include using namespace Microsoft::VisualStudio::CppUnitTestFramework; -static HRESULT RunProc(LPWSTR commandLine) +static HRESULT RunProc(wchar_t* commandLine) { STARTUPINFO SI = { 0 }; PROCESS_INFORMATION PI = { 0 }; @@ -273,7 +273,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(3), GetOutputCSVLineCount()); @@ -283,7 +283,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -293,7 +293,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -304,7 +304,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -316,7 +316,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -328,7 +328,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -339,7 +339,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -351,7 +351,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -363,7 +363,7 @@ namespace WinMLRunnerTest BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" }); // Binding GPU Tensor with Session created with CPU device isn't supported. - Assert::AreEqual(E_INVALIDARG, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(E_INVALIDARG, RunProc(const_cast(command.c_str()))); } TEST_METHOD(GarbageInputGpuClientDeviceCpuBoundRGBImage) { @@ -371,7 +371,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-RGB", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -383,7 +383,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -395,7 +395,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-BGR", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -407,7 +407,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -419,7 +419,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-tensor", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -431,7 +431,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -443,7 +443,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-RGB", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -455,7 +455,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -467,7 +467,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-BGR", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -479,7 +479,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -491,7 +491,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-tensor", L"-CreateDeviceOnClient" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -503,7 +503,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount()); @@ -512,7 +512,7 @@ namespace WinMLRunnerTest TEST_METHOD(RunAllModelsInFolderGarbageInput) { const std::wstring command = BuildCommand({ EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-PerfOutput", OUTPUT_PATH, L"-perf" }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(5), GetOutputCSVLineCount()); @@ -545,7 +545,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring inputPath = CURRENT_PATH + L"fish.png"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } TEST_METHOD(ProvidedImageInputOnlyGpu) @@ -553,7 +553,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring inputPath = CURRENT_PATH + L"fish.png"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } TEST_METHOD(ProvidedImageInputFolder) @@ -573,7 +573,7 @@ namespace WinMLRunnerTest system(copyCommand.c_str()); } const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"SqueezeNet.onnx", L"-InputImageFolder", INPUT_FOLDER_PATH }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); std::string removeCommand = "rd /s /q "; removeCommand += std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end()); @@ -586,7 +586,7 @@ namespace WinMLRunnerTest const std::wstring inputPath = CURRENT_PATH + L"fish_112.png"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-autoScale", L"Cubic" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } TEST_METHOD_WITH_NAME(ProvidedImageInputOnlyCpuPerIterationPerformance) @@ -597,7 +597,7 @@ namespace WinMLRunnerTest BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-SavePerIterationPerf", L"-BaseOutputPath", tensorDataPath, L"-PerIterationPath PerIterationData", L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); // We need to expect one more line because of the header Assert::AreEqual(static_cast(2), GetOutputCSVLineCount(tensorDataPath + L"\\PerIterationData\\Summary.csv")); @@ -609,7 +609,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv", tensorDataPath + L"\\softmaxout_1CpuIteration1.csv")); } @@ -620,7 +620,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -631,7 +631,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_CPU.csv", tensorDataPath + L"\\Plus214_Output_0CpuIteration1.csv")); } @@ -642,7 +642,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_GPU.csv", tensorDataPath + L"\\Plus214_Output_0GpuIteration1.csv")); } @@ -653,7 +653,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_CPU.csv", tensorDataPath + L"\\softmaxout_1CpuIteration1.csv")); } @@ -664,7 +664,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -676,7 +676,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU", L"-Tensor" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv", tensorDataPath + L"\\softmaxout_1CpuIteration1.csv")); } @@ -688,7 +688,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU", L"-Tensor Identity" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -700,7 +700,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU", L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\DenseNet121_fp32_kitten_224_input_CPU.csv", tensorDataPath + L"\\fc6_1CpuIteration1.csv")); } @@ -712,7 +712,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU", L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\DenseNet121_fp32_kitten_224_input_GPU.csv", tensorDataPath + L"\\fc6_1GpuIteration1.csv")); } @@ -724,7 +724,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU", L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\DenseNet121_fp16_kitten_224_input_CPU.csv", tensorDataPath + L"\\fc6_1CpuIteration1.csv")); } @@ -736,7 +736,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU", L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\DenseNet121_fp16_kitten_224_input_GPU.csv", tensorDataPath + L"\\fc6_1GpuIteration1.csv")); } @@ -762,7 +762,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring inputPath = CURRENT_PATH + L"kitten_224.csv"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } TEST_METHOD(ProvidedCSVBadBinding) @@ -770,7 +770,7 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring inputPath = CURRENT_PATH + L"horizontal-crop.csv"; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath }); - Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc(const_cast(command.c_str()))); } TEST_METHOD_WITH_NAME(ProvidedCSVInputGPUSaveCpuBoundTensor) @@ -779,7 +779,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -790,7 +790,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU", L"-GPUBoundInput" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -801,7 +801,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv", tensorDataPath + L"\\softmaxout_1CpuIteration1.csv")); } @@ -812,7 +812,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv", tensorDataPath + L"\\softmaxout_1GpuIteration1.csv")); } @@ -823,7 +823,7 @@ namespace WinMLRunnerTest const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME; const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_CPU.csv", tensorDataPath + L"\\softmaxout_1CpuIteration1.csv")); } @@ -855,7 +855,7 @@ namespace WinMLRunnerTest const std::wstring command = BuildCommand({ EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-ConcurrentLoad", L"-NumThreads", L"5" }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } }; @@ -865,19 +865,19 @@ namespace WinMLRunnerTest TEST_METHOD(LoadModelFailModelNotFound) { const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"invalid_model_name" }); - Assert::AreEqual(HRESULT_FROM_WIN32(E_FAIL), RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND), RunProc(const_cast(command.c_str()))); } TEST_METHOD(TestPrintUsage) { const std::wstring command = BuildCommand({ EXE_PATH }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } TEST_METHOD(TestTopK) { const std::wstring command = BuildCommand({ EXE_PATH, L"-model", CURRENT_PATH + L"SqueezeNet.onnx", L"-TopK", L"5" }); - Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); } /* Commenting out test until WinMLRunnerDLL.dll is properly written and ABI friendly @@ -891,11 +891,11 @@ namespace WinMLRunnerTest const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx"; const std::wstring dllPath = CURRENT_PATH + L"WinMLRunnerDLL.dll"; const std::wstring command = BuildCommand({ L"WinMLRunner_Link_DLL.exe", L"-model", modelPath }); - Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(S_OK, RunProc(const_cast(command.c_str()))); //Rename WinMLRunnerDLL and then run DLL Linked Executable and check if failed rename("WinMLRunnerDLL.dll", "WinMLRunnerDLL_renamed"); - Assert::AreEqual(static_cast(STATUS_DLL_NOT_FOUND), RunProc((wchar_t *)command.c_str())); + Assert::AreEqual(static_cast(STATUS_DLL_NOT_FOUND), RunProc(const_cast(command.c_str()))); //rename back to original naming rename("WinMLRunnerDLL_renamed", "WinMLRunnerDLL.dll"); diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj b/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj index 40de20af..0eb9ef4c 100644 --- a/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj +++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj @@ -1,10 +1,26 @@ + + Debug_NuGet + Win32 + + + Debug_NuGet + x64 + Debug Win32 + + Release_NuGet + Win32 + + + Release_NuGet + x64 + Release Win32 @@ -34,6 +50,13 @@ Unicode false + + DynamicLibrary + true + v141 + Unicode + false + DynamicLibrary false @@ -42,6 +65,14 @@ Unicode false + + DynamicLibrary + false + v141 + true + Unicode + false + DynamicLibrary true @@ -49,6 +80,13 @@ Unicode false + + DynamicLibrary + true + v141 + Unicode + false + DynamicLibrary false @@ -57,6 +95,14 @@ Unicode false + + DynamicLibrary + false + v141 + true + Unicode + false + @@ -65,30 +111,56 @@ + + + + + + + + + + + + true $(SolutionDir)\x86\$(Configuration)\ + + true + $(SolutionDir)\x86\$(Configuration)\ + true + + true + true $(SolutionDir)\x86\$(Configuration)\ + + true + $(SolutionDir)\x86\$(Configuration)\ + true + + true + NotUsing @@ -105,6 +177,22 @@ $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + + NotUsing + Level3 + Disabled + $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;%(PreprocessorDefinitions) + true + stdcpp17 + + + Windows + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + NotUsing @@ -121,6 +209,22 @@ $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + + NotUsing + Level3 + Disabled + $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + _DEBUG;%(PreprocessorDefinitions) + true + stdcpp17 + + + Windows + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + Level3 @@ -142,6 +246,27 @@ $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + + Level3 + NotUsing + MaxSpeed + true + true + $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;%(PreprocessorDefinitions) + true + MultiThreaded + stdcpp17 + + + Windows + true + true + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + Level3 @@ -163,297 +288,526 @@ $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + + Level3 + NotUsing + MaxSpeed + true + true + $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + NDEBUG;%(PreprocessorDefinitions) + true + MultiThreaded + stdcpp17 + + + Windows + true + true + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + $(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj; + + false + false true + true Document false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest false + false true + true false + false true + true false + false true + true false + false true + true PreserveNewest diff --git a/Tools/WinMLRunner/README.md b/Tools/WinMLRunner/README.md index 5166a975..1bff24ae 100644 --- a/Tools/WinMLRunner/README.md +++ b/Tools/WinMLRunner/README.md @@ -83,6 +83,9 @@ Run a model on the CPU and GPU separately, and by binding the input to the CPU a Run a model on the CPU with the input bound to the GPU and loaded as an RGB image: > WinMLRunner.exe -model c:\\data\\SqueezeNet.onnx -CPU -GPUBoundInput -RGB +## Using Microsoft.AI.Machinelearning NuGet +WinMLRunner can be built to use WinML's NuGet package : Microsoft.AI.Machinelearning NuGet. Simply build with the target configuration "Debug_NuGet" or "Release_NuGet". MicrosoftMLRunner.exe will be created and will use ```Microsoft.AI.MachineLearning.dll``` in the immediate directory of the executuble instead of loading ```Windows.AI.MachineLearning.dll``` from System32. MicrosoftMLRunner is useful to compare performance with an older version or testing a newer version of WinML's NuGet. For more information, please reference [Microsoft.AI.MachineLearning NuGet page](https://www.nuget.org/packages/Microsoft.AI.MachineLearning). + ## Default output **Running a good model:** @@ -286,10 +289,6 @@ tracerpt.exe winmllog.etl -o logdump.csv -of CSV 2. Windows Performance Analyzer (from Visual Studio) * Launch Windows Performance Analyzer and open the winmllog.etl. -## Dynamic DLL Loading - -If you want to run WinMLRunner with another version of WinML (e.g. comparing the performance with an older version or testing a newer version), simply place the `windows.ai.machinelearning.dll` and `directml.dll` files in the same folder as WinMLRunner.exe. WinMLRunner will look for for these DLLs first and fall back to `C:/Windows/System32` if it doesn't find them. - ## Known issues - Sequence/Map inputs are not supported yet (the model is just skipped, so it doesn't block other models in a folder); diff --git a/Tools/WinMLRunner/WinMLRunner.sln b/Tools/WinMLRunner/WinMLRunner.sln index b3d17052..f3acc713 100644 --- a/Tools/WinMLRunner/WinMLRunner.sln +++ b/Tools/WinMLRunner/WinMLRunner.sln @@ -17,60 +17,80 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WinMLRunnerScenarios", "Win EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|ARM64 = Debug|ARM64 - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|ARM64 = Release|ARM64 - Release|x64 = Release|x64 - Release|x86 = Release|x86 + Debug_Inbox|x64 = Debug_Inbox|x64 + Debug_Inbox|x86 = Debug_Inbox|x86 + Debug_NuGet|x64 = Debug_NuGet|x64 + Debug_NuGet|x86 = Debug_NuGet|x86 + Release_Inbox|x64 = Release_Inbox|x64 + Release_Inbox|x86 = Release_Inbox|x86 + Release_NuGet|x64 = Release_NuGet|x64 + Release_NuGet|x86 = Release_NuGet|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|ARM64.ActiveCfg = Debug|Win32 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x64.ActiveCfg = Debug|x64 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x64.Build.0 = Debug|x64 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x86.ActiveCfg = Debug|Win32 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x86.Build.0 = Debug|Win32 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|ARM64.ActiveCfg = Release|Win32 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x64.ActiveCfg = Release|x64 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x64.Build.0 = Release|x64 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x86.ActiveCfg = Release|Win32 - {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x86.Build.0 = Release|Win32 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|ARM64.Build.0 = Debug|ARM64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x64.ActiveCfg = Debug|x64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x64.Build.0 = Debug|x64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x86.ActiveCfg = Debug|Win32 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x86.Build.0 = Debug|Win32 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|ARM64.ActiveCfg = Release|ARM64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|ARM64.Build.0 = Release|ARM64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x64.ActiveCfg = Release|x64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x64.Build.0 = Release|x64 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x86.ActiveCfg = Release|Win32 - {31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x86.Build.0 = Release|Win32 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|ARM64.Build.0 = Debug|ARM64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x64.ActiveCfg = Debug|x64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x64.Build.0 = Debug|x64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x86.ActiveCfg = Debug|Win32 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x86.Build.0 = Debug|Win32 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|ARM64.ActiveCfg = Release|ARM64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|ARM64.Build.0 = Release|ARM64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x64.ActiveCfg = Release|x64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x64.Build.0 = Release|x64 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x86.ActiveCfg = Release|Win32 - {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x86.Build.0 = Release|Win32 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|ARM64.Build.0 = Debug|ARM64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x64.ActiveCfg = Debug|x64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x64.Build.0 = Debug|x64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x86.ActiveCfg = Debug|Win32 - {C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x86.Build.0 = Debug|Win32 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|ARM64.ActiveCfg = Release|ARM64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|ARM64.Build.0 = Release|ARM64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|x64.ActiveCfg = Release|x64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|x64.Build.0 = Release|x64 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|x86.ActiveCfg = Release|Win32 - {C174D45D-C189-475B-B1A7-494939EE7491}.Release|x86.Build.0 = Release|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x64.ActiveCfg = Debug|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x64.Build.0 = Debug|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x86.ActiveCfg = Debug|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x86.Build.0 = Debug|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x64.ActiveCfg = Release|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x64.Build.0 = Release|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x86.ActiveCfg = Release|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x86.Build.0 = Release|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x64.Build.0 = Release_NuGet|x64 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32 + {E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x64.ActiveCfg = Debug|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x64.Build.0 = Debug|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x86.ActiveCfg = Debug|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x86.Build.0 = Debug|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x64.ActiveCfg = Release|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x64.Build.0 = Release|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x86.ActiveCfg = Release|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x86.Build.0 = Release|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x64.Build.0 = Release_NuGet|x64 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32 + {31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x64.ActiveCfg = Debug|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x64.Build.0 = Debug|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x86.ActiveCfg = Debug|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x86.Build.0 = Debug|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x64.ActiveCfg = Release|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x64.Build.0 = Release|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x86.ActiveCfg = Release|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x86.Build.0 = Release|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x64.Build.0 = Release_NuGet|x64 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32 + {C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x64.ActiveCfg = Debug|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x64.Build.0 = Debug|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x86.ActiveCfg = Debug|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x86.Build.0 = Debug|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x64.ActiveCfg = Release|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x64.Build.0 = Release|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x86.ActiveCfg = Release|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x86.Build.0 = Release|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x64.Build.0 = Release_NuGet|x64 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32 + {C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Tools/WinMLRunner/WinMLRunner.vcxproj b/Tools/WinMLRunner/WinMLRunner.vcxproj index 773260ec..b1d43b71 100644 --- a/Tools/WinMLRunner/WinMLRunner.vcxproj +++ b/Tools/WinMLRunner/WinMLRunner.vcxproj @@ -1,17 +1,27 @@ + + - - Debug - ARM64 + + Debug_NuGet + Win32 + + + Debug_NuGet + x64 Debug Win32 - - Release - ARM64 + + Release_NuGet + Win32 + + + Release_NuGet + x64 Release @@ -29,6 +39,9 @@ + + + {c174d45d-c189-475b-b1a7-494939ee7491} @@ -49,7 +62,6 @@ Win32Proj WinMLRunner 10.0.18362.0 - true @@ -58,7 +70,7 @@ v141 Unicode - + Application true v141 @@ -71,7 +83,7 @@ true Unicode - + Application false v141 @@ -84,6 +96,12 @@ v141 Unicode + + Application + true + v141 + Unicode + Application false @@ -91,6 +109,13 @@ true Unicode + + Application + false + v141 + true + Unicode + @@ -99,48 +124,66 @@ - + - + + + + + + + true $(Platform)\$(Configuration)\$(ProjectName)\ + + true + $(Platform)\$(Configuration)\$(ProjectName)\ + MicrosoftMLRunner + true $(SolutionDir)\x86\$(Configuration)\ x86\$(Configuration)\$(ProjectName)\ - + true - $(Platform)\$(Configuration)\$(ProjectName)\ + $(SolutionDir)\x86\$(Configuration)\ + x86\$(Configuration)\$(ProjectName)\ false $(SolutionDir)\x86\$(Configuration)\ x86\$(Configuration)\$(ProjectName)\ - + false - $(Platform)\$(Configuration)\$(ProjectName)\ + $(SolutionDir)\x86\$(Configuration)\ + x86\$(Configuration)\$(ProjectName)\ false $(Platform)\$(Configuration)\$(ProjectName)\ + + false + $(Platform)\$(Configuration)\$(ProjectName)\ + MicrosoftMLRunner + NotUsing @@ -169,6 +212,35 @@ $(IntDir) + + + NotUsing + Level3 + Disabled + true + USE_WINML_NUGET;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + + + + + stdcpp17 + + + Console + true + WindowsApp.lib; mincore.lib; DXGI.lib + "ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll + + + $(ProjectDir)src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir) + + + $(IntDir) + + NotUsing @@ -195,13 +267,13 @@ $(ProjectDir)\src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir) - + NotUsing Level3 Disabled true - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + USE_WINML_NUGET;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true @@ -213,7 +285,7 @@ Console true WindowsApp.lib; mincore.lib; DXGI.lib - "ext-ms-win-dxcore-l1-1-0.dll" + "ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll $(IntDir) @@ -253,7 +325,7 @@ $(ProjectDir)\src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir) - + NotUsing Level3 @@ -261,7 +333,7 @@ true true true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + USE_WINML_NUGET;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true @@ -276,7 +348,7 @@ true true WindowsApp.lib; mincore.lib; DXGI.lib - "ext-ms-win-dxcore-l1-1-0.dll" + "ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll $(IntDir) @@ -318,7 +390,52 @@ $(IntDir) + + + NotUsing + Level3 + MaxSpeed + true + true + true + USE_WINML_NUGET;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + + + + + stdcpp17 + MultiThreaded + + + Console + true + true + true + WindowsApp.lib; mincore.lib; DXGI.lib + "ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll + + + $(ProjectDir)src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir) + + + $(IntDir) + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj b/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj deleted file mode 100644 index 413f12f3..00000000 --- a/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj +++ /dev/null @@ -1,234 +0,0 @@ - - - - - Debug - ARM64 - - - Debug - Win32 - - - Release - ARM64 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - - {c3bcbea1-90e6-426f-88ac-64c274bcef45} - true - true - - - - - - - true - 15.0 - {81EA9CC6-8A26-4583-B1A4-84740EF815C8} - Benchmark - 10.0.17763.0 - WinMLRunnerDLL - true - - - - DynamicLibrary - true - v141 - Unicode - - - DynamicLibrary - false - v141 - true - Unicode - - - DynamicLibrary - true - v141 - Unicode - - - DynamicLibrary - true - v141 - Unicode - - - DynamicLibrary - false - v141 - true - Unicode - - - DynamicLibrary - false - v141 - true - Unicode - - - - - - - - - - - - - - - - - - - - - - - - - - - $(Platform)\$(Configuration)\$(ProjectName)\ - $(ProjectName) - - - $(Platform)\$(Configuration)\$(ProjectName)\ - $(ProjectName) - - - $(SolutionDir)\x86\$(Configuration)\ - x86\$(Configuration)\$(ProjectName)\ - $(ProjectName) - - - $(SolutionDir)\x86\$(Configuration)\ - x86\$(Configuration)\$(ProjectName)\ - $(ProjectName) - - - $(ProjectName) - $(Platform)\$(Configuration)\$(ProjectName)\ - - - $(ProjectName) - $(Platform)\$(Configuration)\$(ProjectName)\ - - - - Level3 - Disabled - true - true - stdcpp17 - - - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - Level3 - Disabled - true - stdcpp17 - true - - - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - Level3 - Disabled - true - stdcpp17 - true - - - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - Level3 - MaxSpeed - true - true - true - true - stdcpp17 - MultiThreaded - - - true - true - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - Level3 - MaxSpeed - true - true - true - true - stdcpp17 - MultiThreaded - - - true - true - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - Level3 - MaxSpeed - true - true - true - true - stdcpp17 - MultiThreaded - - - true - true - Console - WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib; - - - - - - \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj index e67b4674..c574b409 100644 --- a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj +++ b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj @@ -1,10 +1,28 @@ + + + + Debug_NuGet + Win32 + + + Debug_NuGet + x64 + Debug Win32 + + Release_NuGet + Win32 + + + Release_NuGet + x64 + Release Win32 @@ -26,6 +44,9 @@ + + + 15.0 {C174D45D-C189-475B-B1A7-494939EE7491} @@ -40,6 +61,12 @@ v141 Unicode + + StaticLibrary + true + v141 + Unicode + StaticLibrary false @@ -47,12 +74,25 @@ true Unicode + + StaticLibrary + false + v141 + true + Unicode + StaticLibrary true v141 Unicode + + StaticLibrary + true + v141 + Unicode + StaticLibrary false @@ -60,6 +100,13 @@ true Unicode + + StaticLibrary + false + v141 + true + Unicode + @@ -68,32 +115,64 @@ + + + + + + + + + + + + true $(Platform)\$(Configuration)\$(ProjectName)\ - + true $(Platform)\$(Configuration)\$(ProjectName)\ + + true + x86\$(Configuration)\$(ProjectName)\ + $(SolutionDir)x86\$(Configuration)\ + + + true + x86\$(Configuration)\$(ProjectName)\ + $(SolutionDir)x86\$(Configuration)\ + - $(Platform)\$(Configuration)\$(ProjectName)\ + x86\$(Configuration)\$(ProjectName)\ false + $(SolutionDir)x86\$(Configuration)\ + + + x86\$(Configuration)\$(ProjectName)\ + false + $(SolutionDir)x86\$(Configuration)\ $(Platform)\$(Configuration)\$(ProjectName)\ false + + $(Platform)\$(Configuration)\$(ProjectName)\ + false + NotUsing @@ -109,6 +188,21 @@ true + + + NotUsing + Level3 + Disabled + true + USE_WINML_NUGET;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + stdcpp17 + + + Windows + true + + NotUsing @@ -127,6 +221,26 @@ true + + + NotUsing + Level3 + Disabled + true + USE_WINML_NUGET;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + E:\winml\Windows-Machine-Learning\Tools\WinMLRunner\src;%(AdditionalIncludeDirectories) + stdcpp17 + + + + + + + Windows + true + + NotUsing @@ -150,6 +264,31 @@ true + + + NotUsing + Level3 + MaxSpeed + true + true + true + USE_WINML_NUGET;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + E:\winml\Windows-Machine-Learning\Tools\WinMLRunner\src;%(AdditionalIncludeDirectories) + stdcpp17 + + + + + MultiThreaded + + + Windows + true + true + true + + NotUsing @@ -170,7 +309,38 @@ true + + + NotUsing + Level3 + MaxSpeed + true + true + true + USE_WINML_NUGET;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + stdcpp17 + MultiThreaded + + + Windows + true + true + true + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters index cfef2a87..027f252a 100644 --- a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters +++ b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters @@ -30,4 +30,7 @@ Source Files + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj index 9fecd048..aa645a4d 100644 --- a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj +++ b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj @@ -1,17 +1,27 @@ + + - - Debug - ARM64 + + Debug_NuGet + Win32 + + + Debug_NuGet + x64 Debug Win32 - - Release - ARM64 + + Release_NuGet + Win32 + + + Release_NuGet + x64 Release @@ -39,10 +49,14 @@ - + + + + + 15.0 @@ -50,7 +64,6 @@ Win32Proj WinMLRunnerStaticLib 10.0.18362.0 - true @@ -59,7 +72,7 @@ v141 Unicode - + StaticLibrary true v141 @@ -72,7 +85,7 @@ true Unicode - + StaticLibrary false v141 @@ -85,6 +98,12 @@ v141 Unicode + + StaticLibrary + true + v141 + Unicode + StaticLibrary false @@ -92,6 +111,13 @@ true Unicode + + StaticLibrary + false + v141 + true + Unicode + @@ -100,48 +126,64 @@ - + - + + + + + + + false $(SolutionDir)x86\$(Configuration)\ x86\$(Configuration)\$(ProjectName)\ - + false - $(Platform)\$(Configuration)\$(ProjectName)\ + $(SolutionDir)x86\$(Configuration)\ + x86\$(Configuration)\$(ProjectName)\ true $(SolutionDir)x86\$(Configuration)\ x86\$(Configuration)\$(ProjectName)\ - + true - $(Platform)\$(Configuration)\$(ProjectName)\ + $(SolutionDir)x86\$(Configuration)\ + x86\$(Configuration)\$(ProjectName)\ true $(Platform)\$(Configuration)\$(ProjectName)\ + + true + $(Platform)\$(Configuration)\$(ProjectName)\ + false $(Platform)\$(Configuration)\$(ProjectName)\ + + false + $(Platform)\$(Configuration)\$(ProjectName)\ + NotUsing @@ -156,8 +198,7 @@ stdcpp17 MultiThreaded - - + %(AdditionalIncludeDirectories) Windows @@ -166,7 +207,7 @@ true - + NotUsing Level3 @@ -174,7 +215,7 @@ true true true - WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) + USE_WINML_NUGET;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) true @@ -182,6 +223,7 @@ stdcpp17 MultiThreaded + %(AdditionalIncludeDirectories) Windows @@ -201,27 +243,27 @@ stdcpp17 - - + %(AdditionalIncludeDirectories) Windows true - + NotUsing Level3 Disabled true - WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + USE_WINML_NUGET;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) true stdcpp17 + %(AdditionalIncludeDirectories) Windows @@ -239,8 +281,27 @@ stdcpp17 - - + %(AdditionalIncludeDirectories) + + + Windows + true + + + + + NotUsing + Level3 + Disabled + true + USE_WINML_NUGET;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + stdcpp17 + + + + + %(AdditionalIncludeDirectories) Windows @@ -261,8 +322,32 @@ MultiThreaded - - + %(AdditionalIncludeDirectories) + + + Windows + true + true + true + + + + + NotUsing + Level3 + MaxSpeed + true + true + true + USE_WINML_NUGET;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + stdcpp17 + + + + + MultiThreaded + %(AdditionalIncludeDirectories) Windows @@ -273,5 +358,16 @@ + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters index ec0b0cce..dd01f055 100644 --- a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters +++ b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters @@ -4,9 +4,6 @@ Source Files - - Source Files - Source Files @@ -16,6 +13,12 @@ Source Files + + Source Files + + + Source Files + @@ -54,4 +57,7 @@ {50101483-1f70-4657-bdeb-edbb6b1d0aaf} + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj b/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj deleted file mode 100644 index d25fa157..00000000 --- a/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj +++ /dev/null @@ -1,267 +0,0 @@ - - - - - Debug - ARM64 - - - Debug - Win32 - - - Release - ARM64 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - - - - - {81ea9cc6-8a26-4583-b1a4-84740ef815c8} - - - - 15.0 - {A1DFBB85-290F-4D1C-8699-0100DB6373F2} - Win32Proj - WinMLRunnerLinkDLL - 10.0.17763.0 - - - - Application - true - v141 - Unicode - - - Application - true - v141 - Unicode - - - Application - false - v141 - true - Unicode - - - Application - false - v141 - true - Unicode - - - Application - true - v141 - Unicode - - - Application - false - v141 - true - Unicode - - - - - - - - - - - - - - - - - - - - - - - - - - - true - - - true - $(SolutionDir)\x86\$(Configuration)\ - x86\$(Configuration)\ - - - true - - - false - $(SolutionDir)\x86\$(Configuration)\ - x86\$(Configuration)\ - - - false - - - false - - - - NotUsing - Level3 - Disabled - true - _DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - stdcpp17 - - - Console - true - - - - - - - NotUsing - Level3 - Disabled - true - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - stdcpp17 - - - Console - true - - - - - - - NotUsing - Level3 - Disabled - true - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - - stdcpp17 - - - Console - true - - - - - - - NotUsing - Level3 - MaxSpeed - true - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - stdcpp17 - - - Console - true - true - true - - - - - - - NotUsing - Level3 - MaxSpeed - true - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - - stdcpp17 - - - Console - true - true - true - - - - - - - NotUsing - Level3 - MaxSpeed - true - true - true - NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - - - - stdcpp17 - - - Console - true - true - true - - - - - - - - \ No newline at end of file diff --git a/Tools/WinMLRunner/packages.config b/Tools/WinMLRunner/packages.config new file mode 100644 index 00000000..839ccacd --- /dev/null +++ b/Tools/WinMLRunner/packages.config @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/Tools/WinMLRunner/src/BindingUtilities.cpp b/Tools/WinMLRunner/src/BindingUtilities.cpp new file mode 100644 index 00000000..f1e7cdb2 --- /dev/null +++ b/Tools/WinMLRunner/src/BindingUtilities.cpp @@ -0,0 +1,1008 @@ +#include "d3dx12.h" +#include +#include +#ifdef USE_WINML_NUGET +#include "Microsoft.AI.Machinelearning.Native.h" +#else +#include "Windows.AI.Machinelearning.Native.h" +#endif +#include "MemoryBuffer.h" +#include "TypeHelper.h" +#include "CommandLineArgs.h" +#include "OutputHelper.h" +#include "BindingUtilities.h" +using namespace winrt::Windows::Media; +using namespace winrt::Windows::Storage; +using namespace winrt::Windows::Storage::Streams; +#ifdef USE_WINML_NUGET +using namespace winrt::Microsoft::AI::MachineLearning; +#else +using namespace winrt::Windows::AI::MachineLearning; +#endif +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Graphics::DirectX; +using namespace winrt::Windows::Graphics::Imaging; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace DirectX::PackedVector; + +inline size_t hash_data(void const* ptr, size_t const bytes) noexcept +{ +#ifdef _WIN64 + constexpr size_t fnv_offset_basis = 14695981039346656037ULL; + constexpr size_t fnv_prime = 1099511628211ULL; +#else + constexpr size_t fnv_offset_basis = 2166136261U; + constexpr size_t fnv_prime = 16777619U; +#endif + size_t result = fnv_offset_basis; + uint8_t const* const buffer = static_cast(ptr); + + for (size_t next = 0; next < bytes; ++next) + { + result ^= buffer[next]; + result *= fnv_prime; + } + + return result; +} + +template struct TensorKindToPointerType +{ + static_assert(true, "No TensorKind mapped for given type!"); +}; +template <> struct TensorKindToPointerType +{ + typedef uint8_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef int8_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef uint16_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef int16_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef uint32_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef int32_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef uint64_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef int64_t Type; +}; +template <> struct TensorKindToPointerType +{ + typedef boolean Type; +}; +template <> struct TensorKindToPointerType +{ + typedef double Type; +}; +template <> struct TensorKindToPointerType +{ + typedef float Type; +}; +template <> struct TensorKindToPointerType +{ + typedef HALF Type; +}; +template <> struct TensorKindToPointerType +{ + typedef winrt::hstring Type; +}; + +template struct TensorKindToValue +{ + static_assert(true, "No TensorKind mapped for given type!"); +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt8Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt8Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt32Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt32Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt64Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt64Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorBoolean Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorDouble Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorFloat Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorFloat16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorString Type; +}; + +template ToType ConvertToPointerType(FromType value) +{ + static_assert(true, "No TensorKind mapped for given type!"); + return 0; +}; +template <> uint8_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> int8_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> uint16_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> int16_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> uint32_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> int32_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> uint64_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> int64_t ConvertToPointerType(float value) { return static_cast(value); }; +template <> boolean ConvertToPointerType(float value) { return static_cast(value); }; +template <> double ConvertToPointerType(double value) { return static_cast(value); }; +template <> float ConvertToPointerType(float value) { return static_cast(value); }; +template <> HALF ConvertToPointerType(float value) { return XMConvertFloatToHalf(value); }; +template <> winrt::hstring ConvertToPointerType(winrt::hstring value) +{ + return static_cast(value); +}; + +ColorManagementMode GetColorManagementMode(const LearningModel& model) + +{ + // Get model color space gamma + hstring gammaSpace = L""; + try + { + gammaSpace = model.Metadata().Lookup(L"Image.ColorSpaceGamma"); + } + catch (...) + { + printf(" Model does not have color space gamma information. Will color manage to sRGB by default...\n"); + } + if (gammaSpace == L"" || _wcsicmp(gammaSpace.c_str(), L"SRGB") == 0) + { + return ColorManagementMode::ColorManageToSRgb; + } + // Due diligence should be done to make sure that the input image is within the model's colorspace. There are + // multiple non-sRGB color spaces. + printf(" Model metadata indicates that color gamma space is : %ws. Will not manage color space to sRGB...\n", + gammaSpace.c_str()); + return ColorManagementMode::DoNotColorManage; +} + +void GetHeightAndWidthFromLearningModelFeatureDescriptor(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, + uint64_t& width, uint64_t& height) +{ + if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Tensor) + { + // We assume NCHW + auto tensorDescriptor = modelFeatureDescriptor.try_as(); + if (tensorDescriptor.Shape().Size() != 4) + { + throw hresult_invalid_argument(L"Cannot generate arbitrary image for tensor input of dimensions: " + + tensorDescriptor.Shape().Size()); + } + height = tensorDescriptor.Shape().GetAt(2); + width = tensorDescriptor.Shape().GetAt(3); + } + else if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Image) + { + auto imageDescriptor = modelFeatureDescriptor.try_as(); + height = imageDescriptor.Height(); + width = imageDescriptor.Width(); + } + else + { + throw hresult_not_implemented( + L"Generating arbitrary image not supported for input types that aren't tensor or image."); + } +} + +namespace BindingUtilities +{ + static unsigned int seed = 0; + static std::independent_bits_engine randomBitsEngineChar; + + SoftwareBitmap GenerateGarbageImage(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, + InputDataType inputDataType) + { + assert(inputDataType != InputDataType::Tensor); + uint64_t width = 0; + uint64_t height = 0; + GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height); + + // We have to create RGBA8 or BGRA8 images, so we need 4 channels + uint32_t totalByteSize = static_cast(width) * static_cast(height) * 4; + + // Generate values for the image based on a seed + std::vector data(totalByteSize); + randomBitsEngineChar.seed(seed++); + std::generate(data.begin(), data.end(), randomBitsEngineChar); + + // Write the values to a buffer + winrt::array_view dataView(data); + InMemoryRandomAccessStream dataStream; + DataWriter dataWriter(dataStream); + dataWriter.WriteBytes(dataView); + IBuffer buffer = dataWriter.DetachBuffer(); + + // Create the software bitmap + return SoftwareBitmap::CreateCopyFromBuffer(buffer, TypeHelper::GetBitmapPixelFormat(inputDataType), + static_cast(width), static_cast(height)); + } + + SoftwareBitmap LoadImageFile(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, + const InputDataType inputDataType, const hstring& filePath, + const CommandLineArgs& args, uint32_t iterationNum, + ColorManagementMode colorManagementMode) + { + // We assume NCHW and NCDHW + uint64_t width = 0; + uint64_t height = 0; + GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height); + IRandomAccessStream stream; + BitmapDecoder decoder = NULL; + try + { + // open the file + StorageFile file = StorageFile::GetFileFromPathAsync(filePath).get(); + // get a stream on it + stream = file.OpenAsync(FileAccessMode::Read).get(); + // Create the decoder from the stream + decoder = BitmapDecoder::CreateAsync(stream).get(); + } + catch (hresult_error hr) + { + printf(" Failed to load the image file, make sure you are using fully qualified paths\r\n"); + printf(" %ws\n", hr.message().c_str()); + exit(hr.code()); + } + BitmapPixelFormat format = inputDataType == InputDataType::Tensor + ? decoder.BitmapPixelFormat() + : TypeHelper::GetBitmapPixelFormat(inputDataType); + try + { + // If input dimensions are different from tensor input, then scale / crop while reading + if (args.IsAutoScale() && (decoder.PixelHeight() != height || decoder.PixelWidth() != width)) + { + if (!args.TerseOutput() || iterationNum == 0) + std::cout << std::endl + << "Binding Utilities: AutoScaling input image to match model input dimensions..."; + + // Create a transform object with default parameters (no transform) + auto transform = BitmapTransform(); + transform.ScaledHeight(static_cast(height)); + transform.ScaledWidth(static_cast(width)); + transform.InterpolationMode(args.AutoScaleInterpMode()); + + // get the bitmap + return decoder + .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), transform, + ExifOrientationMode::RespectExifOrientation, colorManagementMode) + .get(); + } + else + { + // get the bitmap + return decoder + .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), BitmapTransform(), + ExifOrientationMode::RespectExifOrientation, colorManagementMode) + .get(); + } + } + catch (hresult_error hr) + { + printf(" Failed to create SoftwareBitmap! Please make sure that input image is within the model's " + "colorspace.\n"); + printf(" %ws\n", hr.message().c_str()); + exit(hr.code()); + } + } + + VideoFrame CreateVideoFrame(const SoftwareBitmap& softwareBitmap, InputBindingType inputBindingType, + InputDataType inputDataType, const IDirect3DDevice winrtDevice) + { + VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); + + if (inputBindingType == InputBindingType::GPU) + { + VideoFrame gpuImage = + winrtDevice + ? VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType), + softwareBitmap.PixelWidth(), + softwareBitmap.PixelHeight(), winrtDevice) + : VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType), + softwareBitmap.PixelWidth(), + softwareBitmap.PixelHeight()); + + inputImage.CopyToAsync(gpuImage).get(); + + return gpuImage; + } + + return inputImage; + } + + struct InputBufferDesc + { + uint8_t* elements; + uint32_t totalSizeInBytes; + uint32_t numChannelsPerElement; + uint32_t elementStrideInBytes; + bool isPlanar; + TensorKind channelFormat; + BitmapPixelFormat elementFormat; + + InputBufferDesc() + : elements(nullptr), totalSizeInBytes(0), numChannelsPerElement(0), elementStrideInBytes(0), isPlanar(0), + channelFormat(TensorKind::Undefined), elementFormat(BitmapPixelFormat::Unknown) + { + } + }; + + void ReadCSVIntoBuffer(const std::wstring& csvFilePath, InputBufferDesc& inputBufferDesc) + { + std::ifstream fileStream; + fileStream.open(csvFilePath); + if (!fileStream.is_open()) + { + ThrowFailure(L"BindingUtilities: could not open data file."); + } + + uint32_t pos = 0; + std::string line; + float_t* pData = (float_t*)inputBufferDesc.elements; + while (std::getline(fileStream, line, ',')) + { + *pData = std::stof(line); + ++pData; + + ++pos; + if (pos >= inputBufferDesc.totalSizeInBytes) + break; + } + + // Check to see if csv didn't fill in entire buffer and throw or fill with zeros? + if (pos != (inputBufferDesc.totalSizeInBytes * inputBufferDesc.numChannelsPerElement) / + inputBufferDesc.elementStrideInBytes) + { + throw hresult_invalid_argument(L"CSV input size/shape is different from what model expects!"); + } + } + + // Roll the array correctly for the tensor + template + void CopyTensorFromBuffer(void* actualData, uint32_t tensorHeight, uint32_t tensorWidth, + const InputBufferDesc& inputBufferDesc, float scale, const std::vector& means, + const std::vector& stddevs) + { + using WriteType = typename TensorKindToPointerType::Type; + + WriteType* pDataOut = static_cast(actualData); + InputType* pDataIn = (InputType*)inputBufferDesc.elements; + uint32_t elementOffsetMultiplier = inputBufferDesc.isPlanar ? inputBufferDesc.numChannelsPerElement : 1; + uint32_t channelOffsetMultiplier = inputBufferDesc.isPlanar ? 1 : tensorHeight * tensorWidth; + for (uint32_t element = 0; element < tensorHeight * tensorWidth; ++element) + { + for (uint32_t channel = 0; channel < inputBufferDesc.numChannelsPerElement; ++channel) + { + pDataOut[element * elementOffsetMultiplier + channel * channelOffsetMultiplier] = + ConvertToPointerType(((pDataIn[channel] / scale) - means[channel]) / + stddevs[channel]); + } + pDataIn += inputBufferDesc.elementStrideInBytes / sizeof(InputType); + } + } + + template + static void GenerateRandomData(WriteType* data, uint32_t sizeInBytes, uint32_t maxValue) + { + static std::independent_bits_engine + randomBitsEngine; + randomBitsEngine.seed(seed++); + + WriteType* begin = data; + WriteType* end = reinterpret_cast(reinterpret_cast(data) + sizeInBytes); + while (begin <= end) + { + *begin = maxValue * static_cast(randomBitsEngine()) / (randomBitsEngine.max)(); + ++begin; + } + } + + template + static ITensor CreateTensor(const CommandLineArgs& args, const std::vector& tensorShape, + const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc) + { + using TensorValue = typename TensorKindToValue::Type; + using WriteType = typename TensorKindToPointerType::Type; + + // Map the incoming Tensor as a TensorNative to get the actual data buffer. + auto tensorValue = TensorValue::Create(tensorShape); + + com_ptr spTensorValueNative; + tensorValue.as(spTensorValueNative); + + WriteType* actualData; + uint32_t actualSizeInBytes; + THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast(&actualData), &actualSizeInBytes)); + + if (args.IsCSVInput() || args.IsImageInput()) + { + // Assumes NCHW + uint32_t channels = static_cast(tensorShape[1]); + uint32_t tensorHeight = static_cast(tensorShape[2]); + uint32_t tensorWidth = static_cast(tensorShape[3]); + + // Check to make sure the sizes are right + uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes; + uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType)); + if (inputElementCount != outputElementCount) + { + throw hresult_invalid_argument(L"Input size / shape is different from what the model expects"); + } + + float scale; + std::vector means = {}; + std::vector stddevs = {}; + + const auto& tensorizeArgs = args.TensorizeArgs(); + const auto& normalizeParams = tensorizeArgs.Normalize; + switch (tensorizeArgs.Func) + { + case TensorizeFuncs::Identity: + scale = 1.0f; + means.resize(channels, 0.0f); + stddevs.resize(channels, 1.0f); + break; + case TensorizeFuncs::Normalize: + switch (inputBufferDesc.elementFormat) + { + case BitmapPixelFormat::Gray8: + case BitmapPixelFormat::Gray16: + case BitmapPixelFormat::Rgba8: + case BitmapPixelFormat::Rgba16: + scale = normalizeParams.Scale; + means.resize(channels); + stddevs.resize(channels); + for (uint32_t i = 0; i < channels; ++i) + { + means[i] = normalizeParams.Means[i]; + stddevs[i] = normalizeParams.StdDevs[i]; + } + break; + case BitmapPixelFormat::Bgra8: + scale = normalizeParams.Scale; + means.resize(channels); + stddevs.resize(channels); + for (uint32_t i = 0; i < channels; ++i) + { + means[channels - 1 - i] = normalizeParams.Means[i]; + stddevs[channels - 1 - i] = normalizeParams.StdDevs[i]; + } + break; + + default: + throw hresult_invalid_argument( + L"CreateTensor: Unhandled SoftwareBitmap pixel format"); + } + break; + default: + throw hresult_invalid_argument(L"CreateTensor: Unknown Tensorize Function"); + } + + switch (inputBufferDesc.channelFormat) + { + case TensorKind::UInt8: + CopyTensorFromBuffer(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale, + means, stddevs); + break; + case TensorKind::Float: + CopyTensorFromBuffer(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale, + means, stddevs); + break; + default: + throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!"); + } + } + // Garbage Data + else if (args.IsGarbageDataRange()) + { + GenerateRandomData(actualData, actualSizeInBytes, args.GarbageDataMaxValue()); + } + + if (inputBindingType == InputBindingType::CPU) + { + return tensorValue; + } + else // GPU Tensor + { + com_ptr pGPUResource = nullptr; + try + { + // create the d3d device. + com_ptr pD3D12Device = nullptr; + D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), + reinterpret_cast(&pD3D12Device)); + + pD3D12Device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), + D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void()); + if (!args.IsGarbageInput()) + { + com_ptr imageUploadHeap; + // Create the GPU upload buffer. + pD3D12Device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + __uuidof(ID3D12Resource), imageUploadHeap.put_void()); + + // create the command queue. + com_ptr dxQueue = nullptr; + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {}; + commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), + reinterpret_cast(&dxQueue)); + com_ptr devicefactory = + get_activation_factory(); + com_ptr<::IUnknown> spUnk; + devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put()); + + // Create ID3D12GraphicsCommandList and Allocator + D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type; + com_ptr alloctor; + com_ptr cmdList; + pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of(), + alloctor.put_void()); + pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr, + winrt::guid_of(), cmdList.put_void()); + + // Copy from Cpu to GPU + D3D12_SUBRESOURCE_DATA CPUData = {}; + CPUData.pData = actualData; + CPUData.RowPitch = actualSizeInBytes; + CPUData.SlicePitch = actualSizeInBytes; + UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData); + + // Close the command list and execute it to begin the initial GPU setup. + cmdList->Close(); + ID3D12CommandList* ppCommandLists[] = { cmdList.get() }; + dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Create Event + HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + // Create Fence + ::Microsoft::WRL::ComPtr spDirectFence = nullptr; + THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf()))); + // Adds fence to queue + THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1)); + THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent)); + + // Wait for signal + DWORD retVal = WaitForSingleObject(directEvent, INFINITE); + if (retVal != WAIT_OBJECT_0) + { + THROW_IF_FAILED(E_UNEXPECTED); + } + } + } + catch (...) + { + std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl; + throw; + } + com_ptr tensorfactory = get_activation_factory(); + com_ptr<::IUnknown> spUnkTensor; + tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast(tensorShape.data()), + static_cast(tensorShape.size()), spUnkTensor.put()); + TensorValue returnTensor(nullptr); + spUnkTensor.try_as(returnTensor); + return returnTensor; + } + } + + // Process the descriptor to gather and normalize the shape + void ProcessDescriptor(const ILearningModelFeatureDescriptor& description, std::vector& shape, + TensorKind& tensorKind, InputBufferDesc& inputBufferDesc) + { + // Try Image Feature Descriptor + auto imageFeatureDescriptor = description.try_as(); + if (imageFeatureDescriptor) + { + int64_t channels; + inputBufferDesc.elementFormat = imageFeatureDescriptor.BitmapPixelFormat(); + switch (inputBufferDesc.elementFormat) + { + case BitmapPixelFormat::Gray8: + case BitmapPixelFormat::Gray16: + channels = 1; + break; + case BitmapPixelFormat::Bgra8: + case BitmapPixelFormat::Rgba16: + case BitmapPixelFormat::Rgba8: + channels = 3; + break; + default: + throw hresult_not_implemented(L"BitmapPixel format not yet handled by WinMLRunner."); + } + + tensorKind = TensorKind::Float; + shape.push_back(1); + shape.push_back(channels); + shape.push_back(static_cast(imageFeatureDescriptor.Height())); + shape.push_back(static_cast(imageFeatureDescriptor.Width())); + return; + } + + auto tensorDescriptor = description.try_as(); + if (tensorDescriptor) + { + IVectorView tensorShape = tensorDescriptor.Shape(); + for (uint32_t dim = 0; dim < tensorShape.Size(); dim++) + { + int64_t dimSize = tensorShape.GetAt(dim); + if (dimSize > 0) // If the dimension is greater than 0, then it is known. + { + shape.push_back(dimSize); + } + else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an + // invalid model. + { + if (dimSize == -1) + { + shape.push_back(1); + } + else + { + throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " + + dimSize); + } + } + } + + tensorKind = tensorDescriptor.TensorKind(); + return; + } + + throw hresult_invalid_argument(L"ProcessDescriptor: Unknown desription type!"); + } // namespace BindingUtilities + + // Binds tensor floats, ints, doubles from CSV data. + ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const std::wstring& imagePath, + const InputBindingType inputBindingType, const InputDataType inputDataType, + const CommandLineArgs& args, uint32_t iterationNum, + ColorManagementMode colorManagementMode) + { + InputBufferDesc inputBufferDesc = {}; + + std::vector shape = {}; + TensorKind tensorKind = TensorKind::Undefined; + ProcessDescriptor(description, shape, tensorKind, inputBufferDesc); + + SoftwareBitmap softwareBitmap(nullptr); + if (args.IsCSVInput()) + { + inputBufferDesc.channelFormat = TensorKind::Float; + inputBufferDesc.isPlanar = true; + + // Assumes shape is in the format of 'NCHW' + inputBufferDesc.numChannelsPerElement = static_cast(shape[1]); + + // Assumes no gaps in the input csv file + inputBufferDesc.elementStrideInBytes = inputBufferDesc.numChannelsPerElement * sizeof(float_t); + + inputBufferDesc.totalSizeInBytes = sizeof(float_t); + for (uint32_t i = 0; i < shape.size(); ++i) + inputBufferDesc.totalSizeInBytes *= static_cast(shape[i]); + + inputBufferDesc.elements = new uint8_t[inputBufferDesc.totalSizeInBytes]; + + ReadCSVIntoBuffer(args.CsvPath(), inputBufferDesc); + } + else if (args.IsImageInput()) + { + softwareBitmap = + LoadImageFile(description, inputDataType, imagePath.c_str(), args, iterationNum, colorManagementMode); + + // Get Pointers to the SoftwareBitmap data buffers + const BitmapBuffer sbBitmapBuffer(softwareBitmap.LockBuffer(BitmapBufferAccessMode::Read)); + winrt::Windows::Foundation::IMemoryBufferReference sbReference = sbBitmapBuffer.CreateReference(); + auto sbByteAccess = sbReference.as<::Windows::Foundation::IMemoryBufferByteAccess>(); + winrt::check_hresult(sbByteAccess->GetBuffer(&inputBufferDesc.elements, &inputBufferDesc.totalSizeInBytes)); + + inputBufferDesc.isPlanar = false; + inputBufferDesc.elementFormat = softwareBitmap.BitmapPixelFormat(); + switch (inputBufferDesc.elementFormat) + { + case BitmapPixelFormat::Gray8: + inputBufferDesc.channelFormat = TensorKind::UInt8; + inputBufferDesc.numChannelsPerElement = 1; + inputBufferDesc.elementStrideInBytes = sizeof(uint8_t); + break; + case BitmapPixelFormat::Gray16: + inputBufferDesc.channelFormat = TensorKind::UInt16; + inputBufferDesc.numChannelsPerElement = 1; + inputBufferDesc.elementStrideInBytes = sizeof(uint16_t); + break; + case BitmapPixelFormat::Bgra8: + inputBufferDesc.channelFormat = TensorKind::UInt8; + inputBufferDesc.numChannelsPerElement = 3; + inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t); + break; + case BitmapPixelFormat::Rgba8: + inputBufferDesc.channelFormat = TensorKind::UInt8; + inputBufferDesc.numChannelsPerElement = 3; + inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t); + break; + case BitmapPixelFormat::Rgba16: + inputBufferDesc.channelFormat = TensorKind::UInt16; + inputBufferDesc.numChannelsPerElement = 3; + inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint16_t); + break; + default: + throw hresult_invalid_argument(L"Unknown BitmapPixelFormat in input image."); + } + } + + switch (tensorKind) + { + case TensorKind::Undefined: + { + std::cout << "BindingUtilities: TensorKind is undefined." << std::endl; + throw hresult_invalid_argument(); + } + case TensorKind::Float: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Float16: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Double: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Int8: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::UInt8: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Int16: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::UInt16: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Int32: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::UInt32: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::Int64: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + case TensorKind::UInt64: + { + return CreateTensor(args, shape, inputBindingType, inputBufferDesc); + } + break; + } + std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl; + throw hresult_not_implemented(); + } + + ImageFeatureValue CreateBindableImage(const ILearningModelFeatureDescriptor& featureDescriptor, + const std::wstring& imagePath, InputBindingType inputBindingType, + InputDataType inputDataType, const IDirect3DDevice winrtDevice, + const CommandLineArgs& args, uint32_t iterationNum, + ColorManagementMode colorManagementMode) + { + auto softwareBitmap = imagePath.empty() ? GenerateGarbageImage(featureDescriptor, inputDataType) + : LoadImageFile(featureDescriptor, inputDataType, imagePath.c_str(), + args, iterationNum, colorManagementMode); + auto videoFrame = CreateVideoFrame(softwareBitmap, inputBindingType, inputDataType, winrtDevice); + return ImageFeatureValue::CreateFromVideoFrame(videoFrame); + } + + template + void OutputSequenceBinding(IMapView results, hstring name) + { + auto map = results.Lookup(name).as>>().GetAt(0); + auto iter = map.First(); + + K maxKey = -1; + V maxVal = -1; + + while (iter.HasCurrent()) + { + auto pair = iter.Current(); + if (pair.Value() > maxKey) + { + maxVal = pair.Value(); + maxKey = pair.Key(); + } + iter.MoveNext(); + } + std::cout << " " << maxKey << " " << maxVal << std::endl; + } + + void PrintOrSaveEvaluationResults(const LearningModel& model, const CommandLineArgs& args, + const IMapView& results, + OutputHelper& output, int iterationNum) + { + for (auto&& desc : model.OutputFeatures()) + { + if (desc.Kind() == LearningModelFeatureKind::Tensor) + { + std::wstring name(desc.Name()); + if (args.IsSaveTensor() && args.SaveTensorMode() == L"First" && iterationNum > 0) + { + return; + } + if (args.IsSaveTensor()) + { + output.SetDefaultCSVIterationResult(iterationNum, args, name); + } + void* tensor; + uint32_t uCapacity; + com_ptr itn = results.Lookup(desc.Name()).as(); + HRESULT(itn->GetBuffer(reinterpret_cast(&tensor), &uCapacity)); + int size = 0; + unsigned int topK = args.TopK(); + std::vector> maxKValues; + std::ofstream fout; + if (args.IsSaveTensor()) + { + fout.open(output.GetCsvFileNamePerIterationResult(), std::ios_base::app); + fout << "Index" + << "," + << "Value" << std::endl; + } + TensorFeatureDescriptor tensorDescriptor = desc.as(); + TensorKind tensorKind = tensorDescriptor.TensorKind(); + switch (tensorKind) + { + case TensorKind::String: + { + if (!args.IsGarbageInput()) + { + auto resultVector = results.Lookup(desc.Name()).as().GetAsVectorView(); + auto output = resultVector.GetAt(0).data(); + std::wcout << " Result: " << output << std::endl; + } + } + break; + case TensorKind::Float16: + { + output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); + } + break; + case TensorKind::Float: + { + output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); + } + break; + case TensorKind::Int64: + { + auto resultVector = results.Lookup(desc.Name()).as().GetAsVectorView(); + if (!args.IsGarbageInput()) + { + auto output = resultVector.GetAt(0); + std::wcout << " Result: " << output << std::endl; + } + } + break; + default: + { + std::cout << "BindingUtilities: output type not implemented."; + } + break; + } + if (args.IsSaveTensor()) + { + fout.close(); + for (auto& pair : maxKValues) + { + auto maxValue = pair.first; + auto maxIndex = pair.second; + std::string iterationResult = + "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue); + output.SaveResult(iterationNum, iterationResult, + static_cast(hash_data(tensor, uCapacity))); + } + } + if (!args.IsGarbageInput() && iterationNum == 0) + { + std::wcout << L"Outputting top " << args.TopK() << L" values" << std::endl; + std::wcout << L"Feature Name: " << name << std::endl; + for (auto& pair : maxKValues) + { + auto maxValue = pair.first; + auto maxIndex = pair.second; + std::wcout << L" index: " << maxIndex << L", value: " << maxValue << std::endl; + } + } + } + else if (desc.Kind() == LearningModelFeatureKind::Sequence) + { + auto seqDescriptor = desc.as(); + auto mapDescriptor = seqDescriptor.ElementDescriptor().as(); + auto keyKind = mapDescriptor.KeyKind(); + auto valueKind = mapDescriptor.ValueDescriptor(); + auto tensorKind = valueKind.as().TensorKind(); + switch (keyKind) + { + case TensorKind::Int64: + { + OutputSequenceBinding(results, desc.Name()); + } + break; + case TensorKind::Float: + { + OutputSequenceBinding(results, desc.Name()); + } + break; + } + } + } + } +}; // namespace BindingUtilities diff --git a/Tools/WinMLRunner/src/BindingUtilities.h b/Tools/WinMLRunner/src/BindingUtilities.h index cb6fae76..934af937 100644 --- a/Tools/WinMLRunner/src/BindingUtilities.h +++ b/Tools/WinMLRunner/src/BindingUtilities.h @@ -1,975 +1,25 @@ #pragma once -#include -#include -#include "Common.h" -#include "Windows.AI.Machinelearning.Native.h" -#include "d3dx12.h" -#include "MemoryBuffer.h" -using namespace winrt::Windows::Media; -using namespace winrt::Windows::Storage; -using namespace winrt::Windows::Storage::Streams; -using namespace winrt::Windows::AI::MachineLearning; -using namespace winrt::Windows::Foundation::Collections; -using namespace winrt::Windows::Graphics::DirectX; -using namespace winrt::Windows::Graphics::Imaging; -using namespace winrt::Windows::Graphics::DirectX::Direct3D11; -using namespace DirectX::PackedVector; - -template struct TensorKindToPointerType -{ - static_assert(true, "No TensorKind mapped for given type!"); -}; -template <> struct TensorKindToPointerType -{ - typedef uint8_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef int8_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef uint16_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef int16_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef uint32_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef int32_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef uint64_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef int64_t Type; -}; -template <> struct TensorKindToPointerType -{ - typedef boolean Type; -}; -template <> struct TensorKindToPointerType -{ - typedef double Type; -}; -template <> struct TensorKindToPointerType -{ - typedef float Type; -}; -template <> struct TensorKindToPointerType -{ - typedef HALF Type; -}; -template <> struct TensorKindToPointerType -{ - typedef winrt::hstring Type; -}; - -template struct TensorKindToValue -{ - static_assert(true, "No TensorKind mapped for given type!"); -}; -template <> struct TensorKindToValue -{ - typedef TensorUInt8Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorInt8Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorUInt16Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorInt16Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorUInt32Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorInt32Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorUInt64Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorInt64Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorBoolean Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorDouble Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorFloat Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorFloat16Bit Type; -}; -template <> struct TensorKindToValue -{ - typedef TensorString Type; -}; - -template ToType ConvertToPointerType(FromType value) -{ - static_assert(true, "No TensorKind mapped for given type!"); - return 0; -}; -template <> uint8_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> int8_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> uint16_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> int16_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> uint32_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> int32_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> uint64_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> int64_t ConvertToPointerType(float value) { return static_cast(value); }; -template <> boolean ConvertToPointerType(float value) { return static_cast(value); }; -template <> double ConvertToPointerType(double value) { return static_cast(value); }; -template <> float ConvertToPointerType(float value) { return static_cast(value); }; -template <> HALF ConvertToPointerType(float value) { return XMConvertFloatToHalf(value); }; -template <> winrt::hstring ConvertToPointerType(winrt::hstring value) -{ - return static_cast(value); -}; - -static ColorManagementMode GetColorManagementMode(const LearningModel& model) - -{ - // Get model color space gamma - hstring gammaSpace = L""; - try - { - gammaSpace = model.Metadata().Lookup(L"Image.ColorSpaceGamma"); - } - catch (...) - { - printf(" Model does not have color space gamma information. Will color manage to sRGB by default...\n"); - } - if (gammaSpace == L"" || _wcsicmp(gammaSpace.c_str(), L"SRGB") == 0) - { - return ColorManagementMode::ColorManageToSRgb; - } - // Due diligence should be done to make sure that the input image is within the model's colorspace. There are - // multiple non-sRGB color spaces. - printf(" Model metadata indicates that color gamma space is : %ws. Will not manage color space to sRGB...\n", - gammaSpace.c_str()); - return ColorManagementMode::DoNotColorManage; -} +#include +ColorManagementMode GetColorManagementMode(const LearningModel& model); void GetHeightAndWidthFromLearningModelFeatureDescriptor(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, - uint64_t& width, uint64_t& height) -{ - if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Tensor) - { - // We assume NCHW - auto tensorDescriptor = modelFeatureDescriptor.try_as(); - if (tensorDescriptor.Shape().Size() != 4) - { - throw hresult_invalid_argument(L"Cannot generate arbitrary image for tensor input of dimensions: " + - tensorDescriptor.Shape().Size()); - } - height = tensorDescriptor.Shape().GetAt(2); - width = tensorDescriptor.Shape().GetAt(3); - } - else if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Image) - { - auto imageDescriptor = modelFeatureDescriptor.try_as(); - height = imageDescriptor.Height(); - width = imageDescriptor.Width(); - } - else - { - throw hresult_not_implemented( - L"Generating arbitrary image not supported for input types that aren't tensor or image."); - } -} + uint64_t& width, uint64_t& height); namespace BindingUtilities { - static unsigned int seed = 0; - static std::independent_bits_engine randomBitsEngineChar; - - SoftwareBitmap GenerateGarbageImage(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, - InputDataType inputDataType) - { - assert(inputDataType != InputDataType::Tensor); - uint64_t width = 0; - uint64_t height = 0; - GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height); - - // We have to create RGBA8 or BGRA8 images, so we need 4 channels - uint32_t totalByteSize = static_cast(width) * static_cast(height) * 4; - - // Generate values for the image based on a seed - std::vector data(totalByteSize); - randomBitsEngineChar.seed(seed++); - std::generate(data.begin(), data.end(), randomBitsEngineChar); - - // Write the values to a buffer - winrt::array_view dataView(data); - InMemoryRandomAccessStream dataStream; - DataWriter dataWriter(dataStream); - dataWriter.WriteBytes(dataView); - IBuffer buffer = dataWriter.DetachBuffer(); - - // Create the software bitmap - return SoftwareBitmap::CreateCopyFromBuffer(buffer, TypeHelper::GetBitmapPixelFormat(inputDataType), - static_cast(width), static_cast(height)); - } - - SoftwareBitmap LoadImageFile(const ILearningModelFeatureDescriptor& modelFeatureDescriptor, - const InputDataType inputDataType, const hstring& filePath, - const CommandLineArgs& args, uint32_t iterationNum, - ColorManagementMode colorManagementMode) - { - // We assume NCHW and NCDHW - uint64_t width = 0; - uint64_t height = 0; - GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height); - IRandomAccessStream stream; - BitmapDecoder decoder = NULL; - try - { - // open the file - StorageFile file = StorageFile::GetFileFromPathAsync(filePath).get(); - // get a stream on it - stream = file.OpenAsync(FileAccessMode::Read).get(); - // Create the decoder from the stream - decoder = BitmapDecoder::CreateAsync(stream).get(); - } - catch (hresult_error hr) - { - printf(" Failed to load the image file, make sure you are using fully qualified paths\r\n"); - printf(" %ws\n", hr.message().c_str()); - exit(hr.code()); - } - BitmapPixelFormat format = inputDataType == InputDataType::Tensor - ? decoder.BitmapPixelFormat() - : TypeHelper::GetBitmapPixelFormat(inputDataType); - try - { - // If input dimensions are different from tensor input, then scale / crop while reading - if (args.IsAutoScale() && (decoder.PixelHeight() != height || decoder.PixelWidth() != width)) - { - if (!args.TerseOutput() || iterationNum == 0) - std::cout << std::endl - << "Binding Utilities: AutoScaling input image to match model input dimensions..."; - - // Create a transform object with default parameters (no transform) - auto transform = BitmapTransform(); - transform.ScaledHeight(static_cast(height)); - transform.ScaledWidth(static_cast(width)); - transform.InterpolationMode(args.AutoScaleInterpMode()); - - // get the bitmap - return decoder - .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), transform, - ExifOrientationMode::RespectExifOrientation, colorManagementMode).get(); - } - else - { - // get the bitmap - return decoder - .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), BitmapTransform(), - ExifOrientationMode::RespectExifOrientation, colorManagementMode).get(); - } - } - catch (hresult_error hr) - { - printf(" Failed to create SoftwareBitmap! Please make sure that input image is within the model's " - "colorspace.\n"); - printf(" %ws\n", hr.message().c_str()); - exit(hr.code()); - } - } - - VideoFrame CreateVideoFrame(const SoftwareBitmap& softwareBitmap, InputBindingType inputBindingType, - InputDataType inputDataType, const IDirect3DDevice winrtDevice) - { - VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); - - if (inputBindingType == InputBindingType::GPU) - { - VideoFrame gpuImage = - winrtDevice - ? VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType), - softwareBitmap.PixelWidth(), - softwareBitmap.PixelHeight(), winrtDevice) - : VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType), - softwareBitmap.PixelWidth(), - softwareBitmap.PixelHeight()); - - inputImage.CopyToAsync(gpuImage).get(); - - return gpuImage; - } - - return inputImage; - } - - struct InputBufferDesc - { - uint8_t* elements; - uint32_t totalSizeInBytes; - uint32_t numChannelsPerElement; - uint32_t elementStrideInBytes; - bool isPlanar; - TensorKind channelFormat; - BitmapPixelFormat elementFormat; - - InputBufferDesc() - : elements(nullptr), totalSizeInBytes(0), numChannelsPerElement(0), elementStrideInBytes(0), isPlanar(0), - channelFormat(TensorKind::Undefined), elementFormat(BitmapPixelFormat::Unknown) - { - } - }; - - void ReadCSVIntoBuffer(const std::wstring& csvFilePath, InputBufferDesc& inputBufferDesc) - { - std::ifstream fileStream; - fileStream.open(csvFilePath); - if (!fileStream.is_open()) - { - ThrowFailure(L"BindingUtilities: could not open data file."); - } - - uint32_t pos = 0; - std::string line; - float_t* pData = (float_t*)inputBufferDesc.elements; - while (std::getline(fileStream, line, ',')) - { - *pData = std::stof(line); - ++pData; - - ++pos; - if (pos >= inputBufferDesc.totalSizeInBytes) - break; - } - - // Check to see if csv didn't fill in entire buffer and throw or fill with zeros? - if (pos != (inputBufferDesc.totalSizeInBytes * inputBufferDesc.numChannelsPerElement) / - inputBufferDesc.elementStrideInBytes) - { - throw hresult_invalid_argument(L"CSV input size/shape is different from what model expects!"); - } - } - - // Roll the array correctly for the tensor - template - void CopyTensorFromBuffer(void* actualData, uint32_t tensorHeight, uint32_t tensorWidth, - const InputBufferDesc& inputBufferDesc, float scale, const std::vector& means, - const std::vector& stddevs) - { - using WriteType = typename TensorKindToPointerType::Type; - - WriteType* pDataOut = static_cast(actualData); - InputType* pDataIn = (InputType*)inputBufferDesc.elements; - uint32_t elementOffsetMultiplier = inputBufferDesc.isPlanar ? inputBufferDesc.numChannelsPerElement : 1; - uint32_t channelOffsetMultiplier = inputBufferDesc.isPlanar ? 1 : tensorHeight * tensorWidth; - for (uint32_t element = 0; element < tensorHeight * tensorWidth; ++element) - { - for (uint32_t channel = 0; channel < inputBufferDesc.numChannelsPerElement; ++channel) - { - pDataOut[element * elementOffsetMultiplier + channel * channelOffsetMultiplier] = - ConvertToPointerType(((pDataIn[channel] / scale) - means[channel]) / - stddevs[channel]); - } - pDataIn += inputBufferDesc.elementStrideInBytes / sizeof(InputType); - } - } - - template - static void GenerateRandomData(WriteType* data, uint32_t sizeInBytes, uint32_t maxValue) - { - static std::independent_bits_engine - randomBitsEngine; - randomBitsEngine.seed(seed++); - - WriteType* begin = data; - WriteType* end = reinterpret_cast(reinterpret_cast(data) + sizeInBytes); - while (begin <= end) - { - *begin = maxValue * static_cast(randomBitsEngine()) / (randomBitsEngine.max)(); - ++begin; - } - } - - template - static ITensor CreateTensor(const CommandLineArgs& args, const std::vector& tensorShape, - const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc) - { - using TensorValue = typename TensorKindToValue::Type; - using WriteType = typename TensorKindToPointerType::Type; - - // Map the incoming Tensor as a TensorNative to get the actual data buffer. - auto tensorValue = TensorValue::Create(tensorShape); - - com_ptr spTensorValueNative; - tensorValue.as(spTensorValueNative); - - WriteType* actualData; - uint32_t actualSizeInBytes; - THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast(&actualData), &actualSizeInBytes)); - - if (args.IsCSVInput() || args.IsImageInput()) - { - // Assumes NCHW - uint32_t channels = static_cast(tensorShape[1]); - uint32_t tensorHeight = static_cast(tensorShape[2]); - uint32_t tensorWidth = static_cast(tensorShape[3]); - - // Check to make sure the sizes are right - uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes; - uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType)); - if (inputElementCount != outputElementCount) - { - throw hresult_invalid_argument(L"Input size / shape is different from what the model expects"); - } - - float scale; - std::vector means = {}; - std::vector stddevs = {}; - - const auto& tensorizeArgs = args.TensorizeArgs(); - const auto& normalizeParams = tensorizeArgs.Normalize; - switch (tensorizeArgs.Func) - { - case TensorizeFuncs::Identity: - scale = 1.0f; - means.resize(channels, 0.0f); - stddevs.resize(channels, 1.0f); - break; - case TensorizeFuncs::Normalize: - switch (inputBufferDesc.elementFormat) - { - case BitmapPixelFormat::Gray8: - case BitmapPixelFormat::Gray16: - case BitmapPixelFormat::Rgba8: - case BitmapPixelFormat::Rgba16: - scale = normalizeParams.Scale; - means.resize(channels); - stddevs.resize(channels); - for (uint32_t i = 0; i < channels; ++i) - { - means[i] = normalizeParams.Means[i]; - stddevs[i] = normalizeParams.StdDevs[i]; - } - break; - case BitmapPixelFormat::Bgra8: - scale = normalizeParams.Scale; - means.resize(channels); - stddevs.resize(channels); - for (uint32_t i = 0; i < channels; ++i) - { - means[channels - 1 - i] = normalizeParams.Means[i]; - stddevs[channels - 1 - i] = normalizeParams.StdDevs[i]; - } - break; - - default: - throw hresult_invalid_argument( - L"CreateTensor: Unhandled SoftwareBitmap pixel format"); - } - break; - default: - throw hresult_invalid_argument(L"CreateTensor: Unknown Tensorize Function"); - } - - switch (inputBufferDesc.channelFormat) - { - case TensorKind::UInt8: - CopyTensorFromBuffer(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale, - means, stddevs); - break; - case TensorKind::Float: - CopyTensorFromBuffer(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale, - means, stddevs); - break; - default: - throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!"); - } - } - // Garbage Data - else if (args.IsGarbageDataRange()) - { - GenerateRandomData(actualData, actualSizeInBytes, args.GarbageDataMaxValue()); - } - - if (inputBindingType == InputBindingType::CPU) - { - return tensorValue; - } - else // GPU Tensor - { - com_ptr pGPUResource = nullptr; - try - { - // create the d3d device. - com_ptr pD3D12Device = nullptr; - D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device), - reinterpret_cast(&pD3D12Device)); - - pD3D12Device->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void()); - if (!args.IsGarbageInput()) - { - com_ptr imageUploadHeap; - // Create the GPU upload buffer. - pD3D12Device->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - __uuidof(ID3D12Resource), imageUploadHeap.put_void()); - - // create the command queue. - com_ptr dxQueue = nullptr; - D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {}; - commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), - reinterpret_cast(&dxQueue)); - com_ptr devicefactory = - get_activation_factory(); - com_ptr<::IUnknown> spUnk; - devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put()); - - // Create ID3D12GraphicsCommandList and Allocator - D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type; - com_ptr alloctor; - com_ptr cmdList; - pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of(), - alloctor.put_void()); - pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr, - winrt::guid_of(), cmdList.put_void()); - - // Copy from Cpu to GPU - D3D12_SUBRESOURCE_DATA CPUData = {}; - CPUData.pData = actualData; - CPUData.RowPitch = actualSizeInBytes; - CPUData.SlicePitch = actualSizeInBytes; - UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData); - - // Close the command list and execute it to begin the initial GPU setup. - cmdList->Close(); - ID3D12CommandList* ppCommandLists[] = { cmdList.get() }; - dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); - - // Create Event - HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); - - // Create Fence - Microsoft::WRL::ComPtr spDirectFence = nullptr; - THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf()))); - // Adds fence to queue - THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1)); - THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent)); - - // Wait for signal - DWORD retVal = WaitForSingleObject(directEvent, INFINITE); - if (retVal != WAIT_OBJECT_0) - { - THROW_IF_FAILED(E_UNEXPECTED); - } - } - } - catch (...) - { - std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl; - throw; - } - com_ptr tensorfactory = get_activation_factory(); - com_ptr<::IUnknown> spUnkTensor; - tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast(tensorShape.data()), - static_cast(tensorShape.size()), spUnkTensor.put()); - TensorValue returnTensor(nullptr); - spUnkTensor.try_as(returnTensor); - return returnTensor; - } - } - - // Process the descriptor to gather and normalize the shape - void ProcessDescriptor(const ILearningModelFeatureDescriptor& description, std::vector& shape, - TensorKind& tensorKind, InputBufferDesc& inputBufferDesc) - { - // Try Image Feature Descriptor - auto imageFeatureDescriptor = description.try_as(); - if (imageFeatureDescriptor) - { - int64_t channels; - inputBufferDesc.elementFormat = imageFeatureDescriptor.BitmapPixelFormat(); - switch (inputBufferDesc.elementFormat) - { - case BitmapPixelFormat::Gray8: - case BitmapPixelFormat::Gray16: - channels = 1; - break; - case BitmapPixelFormat::Bgra8: - case BitmapPixelFormat::Rgba16: - case BitmapPixelFormat::Rgba8: - channels = 3; - break; - default: - throw hresult_not_implemented(L"BitmapPixel format not yet handled by WinMLRunner."); - } - - tensorKind = TensorKind::Float; - shape.push_back(1); - shape.push_back(channels); - shape.push_back(static_cast(imageFeatureDescriptor.Height())); - shape.push_back(static_cast(imageFeatureDescriptor.Width())); - return; - } - - auto tensorDescriptor = description.try_as(); - if (tensorDescriptor) - { - IVectorView tensorShape = tensorDescriptor.Shape(); - for (uint32_t dim = 0; dim < tensorShape.Size(); dim++) - { - int64_t dimSize = tensorShape.GetAt(dim); - if (dimSize > 0) // If the dimension is greater than 0, then it is known. - { - shape.push_back(dimSize); - } - else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an - // invalid model. - { - if (dimSize == -1) - { - shape.push_back(1); - } - else - { - throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " + - dimSize); - } - } - } - - tensorKind = tensorDescriptor.TensorKind(); - return; - } - - throw hresult_invalid_argument(L"ProcessDescriptor: Unknown desription type!"); - } // namespace BindingUtilities - - // Binds tensor floats, ints, doubles from CSV data. ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const std::wstring& imagePath, const InputBindingType inputBindingType, const InputDataType inputDataType, const CommandLineArgs& args, uint32_t iterationNum, - ColorManagementMode colorManagementMode) - { - InputBufferDesc inputBufferDesc = {}; - - std::vector shape = {}; - TensorKind tensorKind = TensorKind::Undefined; - ProcessDescriptor(description, shape, tensorKind, inputBufferDesc); - - SoftwareBitmap softwareBitmap(nullptr); - if (args.IsCSVInput()) - { - inputBufferDesc.channelFormat = TensorKind::Float; - inputBufferDesc.isPlanar = true; - - // Assumes shape is in the format of 'NCHW' - inputBufferDesc.numChannelsPerElement = static_cast(shape[1]); - - // Assumes no gaps in the input csv file - inputBufferDesc.elementStrideInBytes = inputBufferDesc.numChannelsPerElement * sizeof(float_t); - - inputBufferDesc.totalSizeInBytes = sizeof(float_t); - for (uint32_t i = 0; i < shape.size(); ++i) - inputBufferDesc.totalSizeInBytes *= static_cast(shape[i]); - - inputBufferDesc.elements = new uint8_t[inputBufferDesc.totalSizeInBytes]; - - ReadCSVIntoBuffer(args.CsvPath(), inputBufferDesc); - } - else if (args.IsImageInput()) - { - softwareBitmap = - LoadImageFile(description, inputDataType, imagePath.c_str(), args, iterationNum, colorManagementMode); - - // Get Pointers to the SoftwareBitmap data buffers - const BitmapBuffer sbBitmapBuffer(softwareBitmap.LockBuffer(BitmapBufferAccessMode::Read)); - winrt::Windows::Foundation::IMemoryBufferReference sbReference = sbBitmapBuffer.CreateReference(); - auto sbByteAccess = sbReference.as<::Windows::Foundation::IMemoryBufferByteAccess>(); - winrt::check_hresult(sbByteAccess->GetBuffer(&inputBufferDesc.elements, &inputBufferDesc.totalSizeInBytes)); - - inputBufferDesc.isPlanar = false; - inputBufferDesc.elementFormat = softwareBitmap.BitmapPixelFormat(); - switch (inputBufferDesc.elementFormat) - { - case BitmapPixelFormat::Gray8: - inputBufferDesc.channelFormat = TensorKind::UInt8; - inputBufferDesc.numChannelsPerElement = 1; - inputBufferDesc.elementStrideInBytes = sizeof(uint8_t); - break; - case BitmapPixelFormat::Gray16: - inputBufferDesc.channelFormat = TensorKind::UInt16; - inputBufferDesc.numChannelsPerElement = 1; - inputBufferDesc.elementStrideInBytes = sizeof(uint16_t); - break; - case BitmapPixelFormat::Bgra8: - inputBufferDesc.channelFormat = TensorKind::UInt8; - inputBufferDesc.numChannelsPerElement = 3; - inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t); - break; - case BitmapPixelFormat::Rgba8: - inputBufferDesc.channelFormat = TensorKind::UInt8; - inputBufferDesc.numChannelsPerElement = 3; - inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t); - break; - case BitmapPixelFormat::Rgba16: - inputBufferDesc.channelFormat = TensorKind::UInt16; - inputBufferDesc.numChannelsPerElement = 3; - inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint16_t); - break; - default: - throw hresult_invalid_argument(L"Unknown BitmapPixelFormat in input image."); - } - } - - switch (tensorKind) - { - case TensorKind::Undefined: - { - std::cout << "BindingUtilities: TensorKind is undefined." << std::endl; - throw hresult_invalid_argument(); - } - case TensorKind::Float: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Float16: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Double: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Int8: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::UInt8: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Int16: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::UInt16: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Int32: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::UInt32: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::Int64: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - case TensorKind::UInt64: - { - return CreateTensor(args, shape, inputBindingType, inputBufferDesc); - } - break; - } - std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl; - throw hresult_not_implemented(); - } + ColorManagementMode colorManagementMode); ImageFeatureValue CreateBindableImage(const ILearningModelFeatureDescriptor& featureDescriptor, const std::wstring& imagePath, InputBindingType inputBindingType, - InputDataType inputDataType, const IDirect3DDevice winrtDevice, + InputDataType inputDataType, const winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice winrtDevice, const CommandLineArgs& args, uint32_t iterationNum, - ColorManagementMode colorManagementMode) - { - auto softwareBitmap = imagePath.empty() ? GenerateGarbageImage(featureDescriptor, inputDataType) - : LoadImageFile(featureDescriptor, inputDataType, imagePath.c_str(), - args, iterationNum, colorManagementMode); - auto videoFrame = CreateVideoFrame(softwareBitmap, inputBindingType, inputDataType, winrtDevice); - return ImageFeatureValue::CreateFromVideoFrame(videoFrame); - } - - template - void OutputSequenceBinding(IMapView results, hstring name) - { - auto map = results.Lookup(name).as>>().GetAt(0); - auto iter = map.First(); - - K maxKey = -1; - V maxVal = -1; - - while (iter.HasCurrent()) - { - auto pair = iter.Current(); - if (pair.Value() > maxKey) - { - maxVal = pair.Value(); - maxKey = pair.Key(); - } - iter.MoveNext(); - } - std::cout << " " << maxKey << " " << maxVal << std::endl; - } + ColorManagementMode colorManagementMode); void PrintOrSaveEvaluationResults(const LearningModel& model, const CommandLineArgs& args, - const IMapView& results, - OutputHelper& output, int iterationNum) - { - for (auto&& desc : model.OutputFeatures()) - { - if (desc.Kind() == LearningModelFeatureKind::Tensor) - { - std::wstring name(desc.Name()); - if (args.IsSaveTensor() && args.SaveTensorMode() == L"First" && iterationNum > 0) - { - return; - } - if (args.IsSaveTensor()) - { - output.SetDefaultCSVIterationResult(iterationNum, args, name); - } - void* tensor; - uint32_t uCapacity; - com_ptr itn = results.Lookup(desc.Name()).as(); - HRESULT(itn->GetBuffer(reinterpret_cast(&tensor), &uCapacity)); - int size = 0; - unsigned int topK = args.TopK(); - std::vector> maxKValues; - std::ofstream fout; - if (args.IsSaveTensor()) - { - fout.open(output.getCsvFileNamePerIterationResult(), std::ios_base::app); - fout << "Index" - << "," - << "Value" << std::endl; - } - TensorFeatureDescriptor tensorDescriptor = desc.as(); - TensorKind tensorKind = tensorDescriptor.TensorKind(); - switch (tensorKind) - { - case TensorKind::String: - { - if (!args.IsGarbageInput()) - { - auto resultVector = results.Lookup(desc.Name()).as().GetAsVectorView(); - auto output = resultVector.GetAt(0).data(); - std::wcout << " Result: " << output << std::endl; - } - } - break; - case TensorKind::Float16: - { - output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); - } - break; - case TensorKind::Float: - { - output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); - } - break; - case TensorKind::Int64: - { - auto resultVector = results.Lookup(desc.Name()).as().GetAsVectorView(); - if (!args.IsGarbageInput()) - { - auto output = resultVector.GetAt(0); - std::wcout << " Result: " << output << std::endl; - } - } - break; - default: - { - std::cout << "BindingUtilities: output type not implemented."; - } - break; - } - if (args.IsSaveTensor()) - { - fout.close(); - for (auto& pair : maxKValues) - { - auto maxValue = pair.first; - auto maxIndex = pair.second; - std::string iterationResult = - "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue); - output.SaveResult(iterationNum, iterationResult, - static_cast(hash_data(tensor, uCapacity))); - } - } - if (!args.IsGarbageInput() && iterationNum == 0) - { - std::wcout << L"Outputting top " << args.TopK() << L" values" << std::endl; - std::wcout << L"Feature Name: " << name << std::endl; - for (auto& pair : maxKValues) - { - auto maxValue = pair.first; - auto maxIndex = pair.second; - std::wcout << L" index: " << maxIndex << L", value: " << maxValue << std::endl; - } - } - } - else if (desc.Kind() == LearningModelFeatureKind::Sequence) - { - auto seqDescriptor = desc.as(); - auto mapDescriptor = seqDescriptor.ElementDescriptor().as(); - auto keyKind = mapDescriptor.KeyKind(); - auto valueKind = mapDescriptor.ValueDescriptor(); - auto tensorKind = valueKind.as().TensorKind(); - switch (keyKind) - { - case TensorKind::Int64: - { - OutputSequenceBinding(results, desc.Name()); - } - break; - case TensorKind::Float: - { - OutputSequenceBinding(results, desc.Name()); - } - break; - } - } - } - } -}; // namespace BindingUtilities + const winrt::Windows::Foundation::Collections::IMapView& results, + OutputHelper& output, int iterationNum); + +} \ No newline at end of file diff --git a/Tools/WinMLRunner/src/CommandLineArgs.cpp b/Tools/WinMLRunner/src/CommandLineArgs.cpp index 5210976f..3710ed2a 100644 --- a/Tools/WinMLRunner/src/CommandLineArgs.cpp +++ b/Tools/WinMLRunner/src/CommandLineArgs.cpp @@ -7,17 +7,26 @@ #include #include #include "Filehelper.h" - -using namespace Windows::AI::MachineLearning; - void CommandLineArgs::PrintUsage() { +#ifdef USE_WINML_NUGET + std::cout << "MicrosoftML Runner" << std::endl; +#else std::cout << "WinML Runner" << std::endl; +#endif std::cout << " ---------------------------------------------------------------" << std::endl; - std::cout << "WinmlRunner.exe <-model | -folder> [options]" << std::endl; +#ifdef USE_WINML_NUGET + std::cout << "MicrosoftMLRunner.exe <-model | -folder> [options]" << std::endl; +#else + std::cout << "WinMLRunner.exe <-model | -folder> [options]" << std::endl; +#endif std::cout << std::endl; std::cout << "options: " << std::endl; +#ifdef USE_WINML_NUGET + std::cout << " -version: prints the version information for this build of MicrosoftMLRunner.exe" << std::endl; +#else std::cout << " -version: prints the version information for this build of WinMLRunner.exe" << std::endl; +#endif std::cout << " -CPU : run model on default CPU" << std::endl; std::cout << " -GPU : run model on default GPU" << std::endl; std::cout << " -GPUHighPerformance : run model on GPU with highest performance" << std::endl; @@ -242,7 +251,12 @@ CommandLineArgs::CommandLineArgs(const std::vector& args) if (!IsDebuggerPresent()) { throw hresult_invalid_argument( - L"-DebugEvaluate flag should only be used when WinMLRunner is under a user-mode debugger!"); +#ifdef USE_WINML_NUGET + L"-DebugEvaluate flag should only be used when MicrosoftMLRunner is under a user-mode debugger!" +#else + L"-DebugEvaluate flag should only be used when WinMLRunner is under a user-mode debugger!" +#endif + ); } ToggleEvaluationDebugOutput(true); } @@ -471,8 +485,11 @@ void CommandLineArgs::SetupOutputDirectories(const std::wstring& sBaseOutputPath if (m_perfOutputPath.empty()) { if (sPerfOutputPath.empty()) +#ifdef USE_WINML_NUGET + PerfOutputPath = L"MicrosoftMLRunner[" + oss.str() + L"].csv"; +#else PerfOutputPath = L"WinMLRunner[" + oss.str() + L"].csv"; - +#endif PerfOutputPath = BaseOutputPath / PerfOutputPath; m_perfOutputPath = PerfOutputPath.c_str(); } diff --git a/Tools/WinMLRunner/src/CommandLineArgs.h b/Tools/WinMLRunner/src/CommandLineArgs.h index fd717af6..041d706a 100644 --- a/Tools/WinMLRunner/src/CommandLineArgs.h +++ b/Tools/WinMLRunner/src/CommandLineArgs.h @@ -1,6 +1,7 @@ #pragma once #include "Common.h" - +#include +#include "TypeHelper.h" enum TensorizeFuncs { Identity = 0, diff --git a/Tools/WinMLRunner/src/Common.h b/Tools/WinMLRunner/src/Common.h index 544b0690..481fd069 100644 --- a/Tools/WinMLRunner/src/Common.h +++ b/Tools/WinMLRunner/src/Common.h @@ -4,7 +4,11 @@ #endif // unknown.h needs to be inlcuded before any winrt headers #include +#ifdef USE_WINML_NUGET +#include +#else #include +#endif #include #include #include @@ -22,8 +26,6 @@ #include #include #include -#include "TypeHelper.h" -#include "TimerHelper.h" #include "DirectXPackedVector.h" #ifndef BLOCK_DXCORE diff --git a/Tools/WinMLRunner/src/Concurrency.cpp b/Tools/WinMLRunner/src/Concurrency.cpp index 55a9e2be..7ee42b00 100644 --- a/Tools/WinMLRunner/src/Concurrency.cpp +++ b/Tools/WinMLRunner/src/Concurrency.cpp @@ -7,7 +7,11 @@ #include "ThreadPool.h" using namespace winrt; +#ifdef USE_WINML_NUGET +using namespace winrt::Microsoft::AI::MachineLearning; +#else using namespace winrt::Windows::AI::MachineLearning; +#endif void load_model(const std::wstring& path, bool print_info) { diff --git a/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp b/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp index 682646fb..007e92ec 100644 --- a/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp +++ b/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp @@ -1,12 +1,12 @@ -#include "LearningModelDeviceHelper.h" -#include "TypeHelper.h" -#include "Common.h" #include "d3d11.h" #include "d3dx12.h" +#include "LearningModelDeviceHelper.h" +#include "Common.h" #include #include "Windows.AI.MachineLearning.Native.h" #include #include "OutputHelper.h" +#include "TypeHelper.h" using namespace winrt::Windows::Graphics::DirectX::Direct3D11; #ifdef DXCORE_SUPPORTED_BUILD diff --git a/Tools/WinMLRunner/src/OutputHelper.cpp b/Tools/WinMLRunner/src/OutputHelper.cpp new file mode 100644 index 00000000..8ef34e65 --- /dev/null +++ b/Tools/WinMLRunner/src/OutputHelper.cpp @@ -0,0 +1,1197 @@ +#include "Common.h" +#include "CommandLineArgs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "TimerHelper.h" +#include "LearningModelDeviceHelper.h" +#include "OutputHelper.h" + +#ifdef USE_WINML_NUGET +using namespace winrt::Microsoft::AI::MachineLearning; +#else +using namespace winrt::Windows::AI::MachineLearning; +#endif +using namespace winrt::Windows::Storage::Streams; +using namespace ::Windows::Graphics::DirectX::Direct3D11; +using namespace winrt::Windows::Graphics::DirectX::Direct3D11; +using namespace DirectX::PackedVector; + +void OutputHelper::PrintLoadingInfo(const std::wstring& modelPath) const +{ + wprintf(L"Loading model (path = %s)...\n", modelPath.c_str()); +} + +void OutputHelper::PrintBindingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, + InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation, + const std::string& status) const +{ + printf("Binding (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation = " + "%s)...%s\n", + TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(), + TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(), + status.c_str()); +} + +void OutputHelper::PrintEvaluatingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, + InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation, + const std::string& status) const +{ + printf("Evaluating (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation " + "= %s)...%s\n", + TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(), + TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(), + status.c_str()); +} + +void OutputHelper::PrintModelInfo(const std::wstring& modelPath, const LearningModel& model) const +{ + std::cout << "=================================================================" << std::endl; + std::wcout << "Name: " << model.Name().c_str() << std::endl; + std::wcout << "Author: " << model.Author().c_str() << std::endl; + std::wcout << "Version: " << model.Version() << std::endl; + std::wcout << "Domain: " << model.Domain().c_str() << std::endl; + std::wcout << "Description: " << model.Description().c_str() << std::endl; + std::wcout << "Path: " << modelPath << std::endl; + std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl; + + std::cout << std::endl; + // print out information about input of model + std::cout << "Input Feature Info:" << std::endl; + for (auto&& inputFeature : model.InputFeatures()) + { + PrintFeatureDescriptorInfo(inputFeature); + } + // print out information about output of model + std::cout << "Output Feature Info:" << std::endl; + for (auto&& outputFeature : model.OutputFeatures()) + { + PrintFeatureDescriptorInfo(outputFeature); + } + std::cout << "=================================================================" << std::endl; + std::cout << std::endl; +} + +void OutputHelper::PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const +{ + // IMPORTANT: This learningModelFeatureKind array needs to match the "enum class + // LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h + const std::string learningModelFeatureKind[] = { + "Tensor", + "Sequence", + "Map", + "Image", + }; + std::wstring name(descriptor.Name()); + std::wcout << "Name: " << name << std::endl; + std::wcout << "Feature Kind: " << FeatureDescriptorToString(descriptor) << std::endl; + std::cout << std::endl; +} + +void OutputHelper::PrintHardwareInfo() const +{ + std::cout << "WinML Runner" << std::endl; + std::cout << "Printing available GPUs with DXGI.." << std::endl; + com_ptr factory; + CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void()); + std::vector> validAdapters; + for (UINT i = 0;; ++i) + { + com_ptr spAdapter; + if (factory->EnumAdapters1(i, spAdapter.put()) != S_OK) + { + break; + } + DXGI_ADAPTER_DESC1 pDesc; + spAdapter->GetDesc1(&pDesc); + + // is a software adapter + if (pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE || (pDesc.VendorId == 0x1414 && pDesc.DeviceId == 0x8c)) + { + continue; + } + // valid GPU adapter + else + { + printf("Index: %d, Description: %ls\n", static_cast(validAdapters.size()), pDesc.Description); + validAdapters.push_back(spAdapter); + } + } + std::cout << std::endl; +} + +void OutputHelper::PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device) +{ + if (device.DeviceType == DeviceType::CPU) + { + std::cout << "\nCreated LearningModelDevice with CPU device" << std::endl; + return; + } + + IDirect3DDevice d3dDevice = device.LearningModelDevice.Direct3D11Device(); + com_ptr dxgi; + dxgi = d3dDevice.try_as(); + if (dxgi) + { + com_ptr dxgiDevice; + dxgi->GetInterface(__uuidof(IDXGIDevice), dxgiDevice.put_void()); + com_ptr adapter; + dxgiDevice->GetAdapter(adapter.put()); + DXGI_ADAPTER_DESC description; + if (SUCCEEDED(adapter->GetDesc(&description))) + { + std::wcout << L"\nCreated LearningModelDevice with GPU: " << description.Description << std::endl; + } + } + else + { + std::cout << "Failed to Print Learning Model Device Information" << std::endl; + } +} + +void OutputHelper::PrintResults(const Profiler& profiler, uint32_t numIterations, DeviceType deviceType, + InputBindingType inputBindingType, InputDataType inputDataType, + DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const +{ + double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER); + double createSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER); + + double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER); + double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER); + double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER); + double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER); + double firstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER); + + double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER); + double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER); + double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER); + double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER); + double firstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER); + + double firstLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); + double firstLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double firstLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double firstLoadPeakWorkingSetUsage = profiler[LOAD_MODEL].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); + + double firstSessionCreationWorkingSetMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE); + double firstSessionCreationSharedMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double firstSessionCreationDedicatedMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double firstSessionPeakWorkingSetUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); + + double averageBindMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevBindMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE); + double minBindMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE); + double maxBindMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE); + double firstBindMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double firstBindPeakMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); + + double averageEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevEvalMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); + double minEvalMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE); + double maxEvalMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE); + double firstEvalMemoryUsage = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double firstEvalPeakMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); + + double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + double firstBindDedicatedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + double firstEvalDedicatedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + double firstBindSharedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + + double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + double firstEvalSharedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + + double firstIterationWorkingSetMemoryUsage = + profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE) + + profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE) + + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE) + + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + + double firstIterationSharedMemoryUsage = + profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + + double firstIterationDedicatedMemoryUsage = + profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + + double firstIterationPeakWorkingSet = firstLoadPeakWorkingSetUsage + firstSessionPeakWorkingSetUsage + + firstBindPeakMemoryUsage + firstEvalPeakMemoryUsage; + + printf("\nResults (device = %s, numIterations = %d, inputBinding = %s, inputDataType = %s, " + "deviceCreationLocation = %s):\n", + TypeHelper::Stringify(deviceType).c_str(), numIterations, + TypeHelper::Stringify(inputBindingType).c_str(), TypeHelper::Stringify(inputDataType).c_str(), + TypeHelper::Stringify(deviceCreationLocation).c_str()); + + std::cout << "\nFirst Iteration Performance (load, bind, session creation, and evaluate): " << std::endl; + std::cout << " Load: " << loadTime << " ms" << std::endl; + std::cout << " Bind: " << firstBindTime << " ms" << std::endl; + std::cout << " Session Creation: " << createSessionTime << " ms" << std::endl; + std::cout << " Evaluate: " << firstEvalTime << " ms" << std::endl; + + if (isPerformanceConsoleOutputVerbose) + { + std::cout << "\n Working Set Memory usage (load): " << firstLoadWorkingSetMemoryUsage << " MB" + << std::endl; + std::cout << " Working Set Memory usage (session creation): " << firstSessionCreationWorkingSetMemoryUsage + << " MB" << std::endl; + std::cout << " Working Set Memory usage (bind): " << firstBindMemoryUsage << " MB" << std::endl; + } + else + { + std::cout << std::endl; + } + std::cout << " Working Set Memory usage (evaluate): " << firstEvalMemoryUsage << " MB" << std::endl; + std::cout << " Working Set Memory usage (load, bind, session creation, and evaluate): " + << firstIterationWorkingSetMemoryUsage << " MB" << std::endl; + + if (isPerformanceConsoleOutputVerbose) + { + std::cout << std::endl; + std::cout << " Peak Working Set Memory Difference (from start to load): " << firstLoadPeakWorkingSetUsage + << " MB" << std::endl; + std::cout << " Peak Working Set Memory Difference (from model load to session creation): " + << firstSessionPeakWorkingSetUsage << " MB" << std::endl; + std::cout << " Peak Working Set Memory Difference (from session to bind): " << firstBindPeakMemoryUsage + << " MB" << std::endl; + std::cout << " Peak Working Set Memory Difference (from bind to evaluate): " << firstEvalPeakMemoryUsage + << " MB" << std::endl; + } + + std::cout << " Peak Working Set Memory Difference (load, bind, session creation, and evaluate): " + << firstIterationPeakWorkingSet << " MB" << std::endl; + + if (isPerformanceConsoleOutputVerbose) + { + std::cout << "\n Dedicated Memory usage (load): " << firstLoadDedicatedMemoryUsage << " MB" << std::endl; + std::cout << " Dedicated Memory usage (session creation): " << firstSessionCreationDedicatedMemoryUsage + << " MB" << std::endl; + std::cout << " Dedicated Memory usage (bind): " << firstBindDedicatedMemoryUsage << " MB" << std::endl; + } + else + { + std::cout << std::endl; + } + std::cout << " Dedicated Memory usage (evaluate): " << firstEvalDedicatedMemoryUsage << " MB" << std::endl; + std::cout << " Dedicated Memory usage (load, bind, session creation, and evaluate): " + << firstIterationDedicatedMemoryUsage << " MB" << std::endl; + + if (isPerformanceConsoleOutputVerbose) + { + std::cout << "\n Shared Memory usage (load): " << firstLoadSharedMemoryUsage << " MB" << std::endl; + std::cout << " Shared Memory usage (session creation): " << firstSessionCreationSharedMemoryUsage << " MB" + << std::endl; + std::cout << " Shared Memory usage (bind): " << firstBindSharedMemoryUsage << " MB" << std::endl; + } + else + { + std::cout << std::endl; + } + std::cout << " Shared Memory usage (evaluate): " << firstEvalSharedMemoryUsage << " MB" << std::endl; + std::cout << " Shared Memory usage (load, bind, session creation, and evaluate): " + << firstIterationSharedMemoryUsage << " MB" << std::endl; + + if (numIterations > 1) + { + printf("\nAverage Performance excluding first iteration. Iterations %d to %d. (Iterations greater than 1 " + "only bind and evaluate)\n", + 2, numIterations); + std::cout << " Average Bind: " << averageBindTime << " ms" << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Minimum Bind: " << minBindTime << " ms" << std::endl; + std::cout << " Maximum Bind: " << maxBindTime << " ms" << std::endl; + std::cout << " Standard Deviation Bind: " << stdevBindTime << " ms" << std::endl; + } + std::cout << " Average Evaluate: " << averageEvalTime << " ms" << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Minimum Evaluate: " << minEvalTime << " ms" << std::endl; + std::cout << " Maximum Evaluate: " << maxEvalTime << " ms" << std::endl; + std::cout << " Standard Deviation Evaluate: " << stdevEvalTime << " ms" << std::endl; + } + + std::cout << "\n Average Working Set Memory usage (bind): " << averageBindMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Working Set Memory usage (bind): " << minBindMemoryUsage << " MB" << std::endl; + std::cout << " Max Working Set Memory usage (bind): " << maxBindMemoryUsage << " MB" << std::endl; + std::cout << " Standard Deviation Working Set Memory usage (bind): " << stdevBindMemoryUsage << " MB" + << std::endl; + } + std::cout << " Average Working Set Memory usage (evaluate): " << averageEvalMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Working Set Memory usage (evaluate): " << minEvalMemoryUsage << " MB" << std::endl; + std::cout << " Max Working Set Memory usage (evaluate): " << maxEvalMemoryUsage << " MB" << std::endl; + std::cout << " Standard Deviation Working Set Memory usage (evaluate): " << stdevEvalMemoryUsage + << " MB" << std::endl; + } + + std::cout << "\n Average Dedicated Memory usage (bind): " << averageBindDedicatedMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Dedicated Memory usage (bind): " << minBindDedicatedMemoryUsage << " MB" + << std::endl; + std::cout << " Max Dedicated Memory usage (bind): " << maxBindDedicatedMemoryUsage << " MB" + << std::endl; + std::cout << " Standard Deviation Working Set Memory usage (evaluate): " + << stdevBindDedicatedMemoryUsage << " MB" << std::endl; + } + std::cout << " Average Dedicated Memory usage (evaluate): " << averageEvalDedicatedMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Dedicated Memory usage (evaluate): " << minEvalDedicatedMemoryUsage << " MB" + << std::endl; + std::cout << " Max Dedicated Memory usage (evaluate): " << maxEvalDedicatedMemoryUsage << " MB" + << std::endl; + std::cout << " Standard Deviation Dedicated Memory usage (evaluate): " << stdevEvalDedicatedMemoryUsage + << " MB" << std::endl; + } + + std::cout << "\n Average Shared Memory usage (bind): " << averageBindSharedMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Shared Memory usage (bind): " << minBindSharedMemoryUsage << " MB" << std::endl; + std::cout << " Max Shared Memory usage (bind): " << maxBindSharedMemoryUsage << " MB" << std::endl; + std::cout << " Standard Deviation Shared Memory usage (bind): " << stdevBindSharedMemoryUsage << " MB" + << std::endl; + } + std::cout << " Average Shared Memory usage (evaluate): " << averageEvalSharedMemoryUsage << " MB" + << std::endl; + if (isPerformanceConsoleOutputVerbose) + { + std::cout << " Min Shared Memory usage (evaluate): " << minEvalSharedMemoryUsage << " MB" << std::endl; + std::cout << " Max Shared Memory usage (evaluate): " << maxEvalSharedMemoryUsage << " MB" << std::endl; + std::cout << " Standard Deviation Shared Memory usage (evaluate): " << stdevEvalSharedMemoryUsage + << " MB" << std::endl; + } + } + std::cout << std::endl << std::endl << std::endl; +} + +std::wstring OutputHelper::FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor) +{ + switch (descriptor.Kind()) + { + case LearningModelFeatureKind::Tensor: + { + auto tensorDescriptor = descriptor.as(); + return TypeHelper::Stringify(tensorDescriptor.TensorKind()); + } + case LearningModelFeatureKind::Image: + { + auto imageDescriptor = descriptor.as(); + std::wstring str = L"Image (Height: " + std::to_wstring(imageDescriptor.Height()) + L", Width: " + + std::to_wstring(imageDescriptor.Width()) + L")"; + return str; + } + case LearningModelFeatureKind::Map: + { + auto mapDescriptor = descriptor.as(); + std::wstring str = L"Map<" + TypeHelper::Stringify(mapDescriptor.KeyKind()) + L","; + str += FeatureDescriptorToString(mapDescriptor.ValueDescriptor()); + str += L">"; + return str; + } + case LearningModelFeatureKind::Sequence: + { + auto sequenceDescriptor = descriptor.as(); + std::wstring str = L"List<" + FeatureDescriptorToString(sequenceDescriptor.ElementDescriptor()) + L">"; + return str; + } + default: + return (L"Invalid feature %s.", descriptor.Name().c_str()); + } +} + +bool OutputHelper::doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor) +{ + switch (descriptor.Kind()) + { + case LearningModelFeatureKind::Tensor: + { + return descriptor.as().TensorKind() == TensorKind::Float16; + } + break; + case LearningModelFeatureKind::Map: + { + auto mapDescriptor = descriptor.as(); + if (mapDescriptor.KeyKind() == TensorKind::Float16) + { + return true; + } + return doesDescriptorContainFP16(mapDescriptor.ValueDescriptor()); + } + break; + case LearningModelFeatureKind::Sequence: + { + return doesDescriptorContainFP16(descriptor.as().ElementDescriptor()); + } + break; + default: + { + return false; + } + } +} + +bool OutputHelper::doesModelContainFP16(const LearningModel& model) +{ + for (auto&& inputFeature : model.InputFeatures()) + { + if (doesDescriptorContainFP16(inputFeature)) + { + return true; + } + } + return false; +} + +void OutputHelper::SaveLoadTimes(Profiler& profiler, uint32_t iterNum) +{ + m_clockLoadTimes[iterNum] = profiler[LOAD_MODEL].GetClockTime(); +} + +void OutputHelper::SaveBindTimes(Profiler& profiler, uint32_t iterNum) +{ + m_clockBindTimes[iterNum] = + (iterNum == 0) ? profiler[BIND_VALUE_FIRST_RUN].GetClockTime() : profiler[BIND_VALUE].GetClockTime(); +} + +void OutputHelper::SaveEvalPerformance(Profiler& profiler, uint32_t iterNum) +{ + enum WINML_MODEL_TEST_PERF eval = (iterNum == 0) ? EVAL_MODEL_FIRST_RUN : EVAL_MODEL; + m_clockEvalTimes[iterNum] = profiler[eval].GetClockTime(); + m_CPUWorkingDiff[iterNum] = profiler[eval].GetCpuWorkingDiff(); + m_CPUWorkingStart[iterNum] = profiler[eval].GetCpuWorkingStart(); + m_GPUSharedDiff[iterNum] = profiler[eval].GetGpuSharedDiff(); + m_GPUSharedStart[iterNum] = profiler[eval].GetGpuSharedStart(); + m_GPUDedicatedDiff[iterNum] = profiler[eval].GetGpuDedicatedDiff(); +} + +void OutputHelper::SaveResult(uint32_t iterationNum, std::string result, int hashcode) +{ + m_outputResult[iterationNum] = result; + m_outputTensorHash[iterationNum] = hashcode; +} + +void OutputHelper::SetDefaultPerIterationFolder(const std::wstring& folderName) +{ + m_folderNamePerIteration = folderName; + if (std::filesystem::create_directories(m_folderNamePerIteration.c_str()) != 0) + std::wcout << L"Folder [" + m_folderNamePerIteration + L"] cannot be created"; +} + +void OutputHelper::SetDefaultCSVFileNamePerIteration() +{ + m_csvFileNamePerIterationSummary = m_folderNamePerIteration + L"\\Summary.csv"; +} + +std::wstring OutputHelper::GetDefaultCSVFileNamePerIteration() { return m_csvFileNamePerIterationSummary; } + +std::wstring OutputHelper::GetCsvFileNamePerIterationResult() { return m_csvFileNamePerIterationResult; } + +void OutputHelper::SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args, + std::wstring& featureName) +{ + if (args.UseCPU() && args.UseGPU()) + { + if (!m_flagGpuDevice) + { + m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration"; + if (iterationNum == args.NumIterations() - 1 || args.SaveTensorMode() == L"First") + { + m_flagGpuDevice = true; + } + } + else + { + m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration"; + } + } + else if (args.UseGPU()) + { + m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration"; + } + else + { + m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration"; + } + m_csvFileNamePerIterationResult = m_fileNameResultDevice + std::to_wstring(iterationNum + 1) + L".csv"; +} + +void OutputHelper::SetCSVFileName(const std::wstring& fileName) { m_csvFileName = fileName; } + +void OutputHelper::WritePerIterationPerformance(const CommandLineArgs& args, const std::wstring model, + const std::wstring imagePath) +{ + if (m_csvFileNamePerIterationSummary.length() > 0) + { + bool bNewFile = false; + std::ifstream fin; + fin.open(m_csvFileNamePerIterationSummary); + std::filebuf* outbuf = fin.rdbuf(); + if (EOF == outbuf->sbumpc()) + { + bNewFile = true; + } + fin.close(); + + std::ofstream fout; + fout.open(m_csvFileNamePerIterationSummary, std::ios_base::app); + + std::wstring_convert> converter; + std::string modelName = converter.to_bytes(model); + std::string fileNameResultDevice = converter.to_bytes(m_fileNameResultDevice); + std::string inputName = args.IsCSVInput() ? converter.to_bytes(args.CsvPath()) + : args.IsImageInput() ? converter.to_bytes(imagePath) : ""; + + if (bNewFile) + { + if (args.IsPerIterationCapture()) + { + fout << "Model Name" + << "," + << "Input Name" + << "," + << "Iterations" + << "," + << "Iteration Number " + << "," + << "CPU Working Set Diff (MB)" + << "," + << "CPU Working Set Start (MB)" + << "," + << "GPU Shared Memory Diff (MB)" + << "," + << "GPU Shared Memory Start (MB)" + << "," + << "GPU Dedicated Memory Diff (MB)" + << "," + << "Load (ms)" + << "," + << "Bind (ms)" + << "," + << "Evaluate (ms)" + << ","; + + if (args.IsSaveTensor()) + { + fout << "Result" + << "," + << "OutputTensorHash" + << "," + << "FileName"; + } + } + + else if (args.IsSaveTensor()) + { + fout << "Iteration Number" + << "," + << "Result" + << "," + << "OutputTensorHash" + << "," + << "FileName"; + } + fout << std::endl; + } + + if (args.IsPerIterationCapture()) + { + for (uint32_t i = 0; i < args.NumIterations(); i++) + { + fout << modelName << "," << inputName << "," << args.NumIterations() << "," << i + 1 << "," + << m_CPUWorkingDiff[i] << "," << m_CPUWorkingStart[i] << "," << m_GPUSharedDiff[i] << "," + << m_GPUSharedStart[i] << "," << m_GPUDedicatedDiff[i] << "," << m_clockLoadTimes[i] << "," + << m_clockBindTimes[i] << "," << m_clockEvalTimes[i] << ","; + + if (args.IsSaveTensor() && + (args.SaveTensorMode() == L"All" || (args.SaveTensorMode() == L"First" && i == 0))) + { + fout << m_outputResult[i] << "," << m_outputTensorHash[i] << "," + << fileNameResultDevice + std::to_string(i + 1) + ".csv" + << ","; + } + fout << std::endl; + } + } + else if (args.IsSaveTensor()) + { + for (uint32_t i = 0; i < args.NumIterations(); i++) + { + fout << i + 1 << "," << m_outputResult[i] << "," << m_outputTensorHash[i] << "," + << fileNameResultDevice + std::to_string(i + 1) + ".csv" << std::endl; + if (args.SaveTensorMode() == L"First" && i == 0) + { + break; + } + } + } + fout.close(); + } +} + +template +void OutputHelper::ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, + std::vector>& maxValues, std::ofstream& fout, unsigned int k) +{ + // Create a priority queue of size k that pops the lowest value first + // We will remove lowest values as we iterate over all the values + auto cmp = [](std::pair x, std::pair y) { return x.first > y.first; }; + std::priority_queue, std::vector>, decltype(cmp)> topKvalues(cmp); + + T* tensor = (T*)buffer; + int size = uCapacity / sizeof(T); + for (int i = 0; i < size; i++) + { + float val = 0; + if (!std::is_same::value) + { + val = *(tensor + i); + } + else + { + val = XMConvertHalfToFloat(static_cast(*(tensor + i))); + } + if (args.IsSaveTensor()) + { + fout << i << "," << val << std::endl; + } + + if (topKvalues.size() < k) + { + topKvalues.push({ val, i }); + } + else if (k > 0) + { + auto maxValue = topKvalues.top().first; + if (maxValue < val) + { + topKvalues.pop(); + topKvalues.push({ val, i }); + } + } + } + while (!topKvalues.empty()) + { + auto pair = topKvalues.top(); + maxValues.push_back(pair); + topKvalues.pop(); + } + // Put vector in order of highest value to lowest + std::reverse(maxValues.begin(), maxValues.end()); +} +template void OutputHelper::ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, + std::vector>& maxValues, std::ofstream& fout, + unsigned int k); +template void OutputHelper::ProcessTensorResult(const CommandLineArgs& args, const void* buffer, + const uint32_t uCapacity, + std::vector>& maxValues, + std::ofstream& fout, unsigned int k); + +void OutputHelper::WritePerformanceDataToCSV(const Profiler& profiler, int numIterations, + std::wstring model, const std::string& deviceType, const std::string& inputBinding, + const std::string& inputType, const std::string& deviceCreationLocation, + const std::vector>& perfFileMetadata) const +{ + double averageLoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER); + double stdevLoadTime = profiler[LOAD_MODEL].GetStdev(CounterType::TIMER); + double minLoadTime = profiler[LOAD_MODEL].GetMin(CounterType::TIMER); + double maxLoadTime = profiler[LOAD_MODEL].GetMax(CounterType::TIMER); + uint32_t numberLoadIterations = profiler[LOAD_MODEL].GetCount(); + + double averageCreateSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER); + double stdevCreateSessionTime = profiler[CREATE_SESSION].GetStdev(CounterType::TIMER); + double minCreateSessionTime = profiler[CREATE_SESSION].GetMin(CounterType::TIMER); + double maxCreateSessionTime = profiler[CREATE_SESSION].GetMax(CounterType::TIMER); + uint32_t numberCreateSessionIterations = profiler[CREATE_SESSION].GetCount(); + + double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER); + double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER); + double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER); + double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER); + + double averageFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER); + double stdevFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::TIMER); + double minFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::TIMER); + double maxFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::TIMER); + + double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER); + double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER); + double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER); + double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER); + + double averageFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER); + double stdevFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetStdev(CounterType::TIMER); + double minFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMin(CounterType::TIMER); + double maxFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMax(CounterType::TIMER); + + double averageLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); + double minLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::WORKING_SET_USAGE); + double maxLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::WORKING_SET_USAGE); + + double averageCreateSessionWorkingSetMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevCreateSessionWorkingSetMemoryUsage = + profiler[CREATE_SESSION].GetStdev(CounterType::WORKING_SET_USAGE); + double minCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::WORKING_SET_USAGE); + double maxCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::WORKING_SET_USAGE); + + double averageBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE); + double minBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE); + double maxBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE); + + double averageFirstBindWorkingSetMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevFirstBindWorkingSetMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::WORKING_SET_USAGE); + double minFirstBindWorkingSetMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::WORKING_SET_USAGE); + double maxFirstBindWorkingSetMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::WORKING_SET_USAGE); + + double averageEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); + double minEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE); + double maxEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE); + + double averageFirstEvalWorkingSetMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double stdevFirstEvalWorkingSetMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double minFirstEvalWorkingSetMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + double maxFirstEvalWorkingSetMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); + + double averageLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageCreateSessionDedicatedMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevCreateSessionDedicatedMemoryUsage = + profiler[CREATE_SESSION].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minCreateSessionDedicatedMemoryUsage = + profiler[CREATE_SESSION].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxCreateSessionDedicatedMemoryUsage = + profiler[CREATE_SESSION].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageFirstBindDedicatedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevFirstBindDedicatedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minFirstBindDedicatedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxFirstBindDedicatedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); + double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageFirstEvalDedicatedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double stdevFirstEvalDedicatedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double minFirstEvalDedicatedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + double maxFirstEvalDedicatedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); + + double averageLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + + double averageCreateSessionSharedMemoryUsage = + profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevCreateSessionSharedMemoryUsage = + profiler[CREATE_SESSION].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + + double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + + double averageFirstBindSharedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevFirstBindSharedMemoryUsage = + profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + + double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); + double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); + double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); + + double averageFirstEvalSharedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double stdevFirstEvalSharedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double minFirstEvalSharedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + double maxFirstEvalSharedMemoryUsage = + profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); + + if (!m_csvFileName.empty()) + { + // Check if header exists + bool bNewFile = false; + std::ifstream fin; + fin.open(m_csvFileName); + std::filebuf* outbuf = fin.rdbuf(); + if (EOF == outbuf->sbumpc()) + { + bNewFile = true; + } + fin.close(); + + std::ofstream fout; + fout.open(m_csvFileName, std::ios_base::app); + + std::wstring_convert> converter; + std::string modelName = converter.to_bytes(model); + + if (bNewFile) + { + fout << "model name" + << "," + << "device type" + << "," + << "input binding" + << "," + << "input type" + << "," + << "device creation location" + << "," + << "iterations" + << "," + << "load iterations" + << "," + << "session creation iterations" + << "," + << "average load (ms)" + << "," + << "standard deviation load (ms)" + << "," + << "min load (ms)" + << "," + << "max load (ms)" + << "," + << "average session creation (ms)" + << "," + << "standard deviation session creation (ms)" + << "," + << "min session creation (ms)" + << "," + << "max session creation (ms)" + << "," + << "average first bind (ms)" + << "," + << "standard deviation first bind (ms)" + << "," + << "min first bind (ms)" + << "," + << "max first bind (ms)" + << "," + << "average bind (ms)" + << "," + << "standard deviation bind (ms)" + << "," + << "min bind (ms)" + << "," + << "max bind (ms)" + << "," + << "average first evaluate (ms)" + << "," + << "standard deviation first evaluate (ms)" + << "," + << "min first evaluate (ms)" + << "," + << "max first evaluate (ms)" + << "," + << "average evaluate (ms)" + << "," + << "standard deviation evaluate (ms)" + << "," + << "min evaluate (ms)" + << "," + << "max evaluate (ms)" + << "," + << "load average working set memory (MB)" + << "," + << "load standard deviation working set memory (MB)" + << "," + << "load min working set memory (MB)" + << "," + << "load max working set memory (MB)" + << "," + << "session creation average working set memory (MB)" + << "," + << "session creation standard deviation working set memory (MB)" + << "," + << "session creation min working set memory (MB)" + << "," + << "session creation max working set memory (MB)" + << "," + << "first bind average working set memory (MB)" + << "," + << "first bind standard deviation working set memory (MB)" + << "," + << "first bind min working set memory (MB)" + << "," + << "first bind max working set memory (MB)" + << "," + << "bind average working set memory (MB)" + << "," + << "bind standard deviation working set memory (MB)" + << "," + << "bind min working set memory (MB)" + << "," + << "bind max working set memory (MB)" + << "," + << "first evaluate average working set memory (MB)" + << "," + << "first evaluate standard deviation working set memory (MB)" + << "," + << "first evaluate min working set memory (MB)" + << "," + << "first evaluate max working set memory (MB)" + << "," + << "evaluate average working set memory (MB)" + << "," + << "evaluate standard deviation working set memory (MB)" + << "," + << "evaluate min working set memory (MB)" + << "," + << "evaluate max working set memory (MB)" + << "," + << "load average dedicated memory (MB)" + << "," + << "load standard deviation dedicated memory (MB)" + << "," + << "load min dedicated memory (MB)" + << "," + << "load max dedicated memory (MB)" + << "," + << "session creation average dedicated memory (MB)" + << "," + << "session creation standard deviation dedicated memory (MB)" + << "," + << "session creation min dedicated memory (MB)" + << "," + << "session creation max dedicated memory (MB)" + << "," + << "first bind average dedicated memory (MB)" + << "," + << "first bind standard deviation dedicated memory (MB)" + << "," + << "first bind min dedicated memory (MB)" + << "," + << "first bind max dedicated memory (MB)" + << "," + << "bind average dedicated memory (MB)" + << "," + << "bind standard deviation dedicated memory (MB)" + << "," + << "bind min dedicated memory (MB)" + << "," + << "bind max dedicated memory (MB)" + << "," + << "first evaluate average dedicated memory (MB)" + << "," + << "first evaluate standard deviation dedicated memory (MB)" + << "," + << "first evaluate min dedicated memory (MB)" + << "," + << "first evaluate max dedicated memory (MB)" + << "," + << "evaluate average dedicated memory (MB)" + << "," + << "evaluate standard deviation dedicated memory (MB)" + << "," + << "evaluate min dedicated memory (MB)" + << "," + << "evaluate max dedicated memory (MB)" + << "," + << "load average shared memory (MB)" + << "," + << "load standard deviation shared memory (MB)" + << "," + << "load min shared memory (MB)" + << "," + << "load max shared memory (MB)" + << "," + << "session creation average shared memory (MB)" + << "," + << "session creation standard deviation shared memory (MB)" + << "," + << "session creation min shared memory (MB)" + << "," + << "session creation max shared memory (MB)" + << "," + << "first bind average shared memory (MB)" + << "," + << "first bind standard deviation shared memory (MB)" + << "," + << "first bind min shared memory (MB)" + << "," + << "first bind max shared memory (MB)" + << "," + << "bind average shared memory (MB)" + << "," + << "bind standard deviation shared memory (MB)" + << "," + << "bind min shared memory (MB)" + << "," + << "bind max shared memory (MB)" + << "," + << "first evaluate average shared memory (MB)" + << "," + << "first evaluate standard deviation shared memory (MB)" + << "," + << "first evaluate min shared memory (MB)" + << "," + << "first evaluate max shared memory (MB)" + << "," + << "evaluate average shared memory (MB)" + << "," + << "evaluate standard deviation shared memory (MB)" + << "," + << "evaluate min shared memory (MB)" + << "," + << "evaluate max shared memory (MB)" + << ","; + for (auto metaDataPair : perfFileMetadata) + { + fout << metaDataPair.first << ","; + } + fout << std::endl; + } + fout << modelName << "," << deviceType << "," << inputBinding << "," << inputType << "," + << deviceCreationLocation << "," << numIterations << "," << numberLoadIterations << "," + << numberCreateSessionIterations << "," << averageLoadTime << "," << stdevLoadTime << "," + << minLoadTime << "," << maxLoadTime << "," << averageCreateSessionTime << "," + << stdevCreateSessionTime << "," << minCreateSessionTime << "," << maxCreateSessionTime << "," + << averageFirstBindTime << "," << stdevFirstBindTime << "," << minFirstBindTime << "," + << maxFirstBindTime << "," << (numIterations <= 1 ? 0 : averageBindTime) << "," + << (numIterations <= 1 ? 0 : stdevBindTime) << "," << (numIterations <= 1 ? 0 : minBindTime) << "," + << (numIterations <= 1 ? 0 : maxBindTime) << "," << averageFirstEvalTime << "," << stdevFirstEvalTime + << "," << minFirstEvalTime << "," << maxFirstEvalTime << "," + << (numIterations <= 1 ? 0 : averageEvalTime) << "," << (numIterations <= 1 ? 0 : stdevEvalTime) << "," + << (numIterations <= 1 ? 0 : minEvalTime) << "," << (numIterations <= 1 ? 0 : maxEvalTime) << "," + + << averageLoadWorkingSetMemoryUsage << "," << stdevLoadWorkingSetMemoryUsage << "," + << minLoadWorkingSetMemoryUsage << "," << maxLoadWorkingSetMemoryUsage << "," + << averageCreateSessionWorkingSetMemoryUsage << "," << stdevCreateSessionWorkingSetMemoryUsage << "," + << minCreateSessionWorkingSetMemoryUsage << "," << maxCreateSessionWorkingSetMemoryUsage << "," + << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," + << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << "," + << (numIterations <= 1 ? 0 : averageBindWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : stdevBindWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxBindWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minBindWorkingSetMemoryUsage) << "," + << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," + << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << "," + << (numIterations <= 1 ? 0 : averageEvalWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : stdevEvalWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxEvalWorkingSetMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minEvalWorkingSetMemoryUsage) << "," + + << averageLoadDedicatedMemoryUsage << "," << stdevLoadDedicatedMemoryUsage << "," + << minLoadDedicatedMemoryUsage << "," << maxLoadDedicatedMemoryUsage << "," + << averageCreateSessionDedicatedMemoryUsage << "," << stdevCreateSessionDedicatedMemoryUsage << "," + << minCreateSessionDedicatedMemoryUsage << "," << maxCreateSessionDedicatedMemoryUsage << "," + << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," + << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << "," + << (numIterations <= 1 ? 0 : averageBindDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : stdevBindDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxBindDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minBindDedicatedMemoryUsage) << "," + << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," + << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << "," + << (numIterations <= 1 ? 0 : averageEvalDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : stdevEvalDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxEvalDedicatedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minEvalDedicatedMemoryUsage) << "," + + << averageLoadSharedMemoryUsage << "," << stdevLoadSharedMemoryUsage << "," << minLoadSharedMemoryUsage + << "," << maxLoadSharedMemoryUsage << "," << averageCreateSessionSharedMemoryUsage << "," + << stdevCreateSessionSharedMemoryUsage << "," << minCreateSessionSharedMemoryUsage << "," + << maxCreateSessionSharedMemoryUsage << "," << averageFirstBindSharedMemoryUsage << "," + << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," + << maxFirstBindSharedMemoryUsage << "," << (numIterations <= 1 ? 0 : averageBindSharedMemoryUsage) + << "," << (numIterations <= 1 ? 0 : stdevBindSharedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxBindSharedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minBindSharedMemoryUsage) << "," << averageFirstBindSharedMemoryUsage + << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," + << maxFirstBindSharedMemoryUsage << "," << (numIterations <= 1 ? 0 : averageEvalSharedMemoryUsage) + << "," << (numIterations <= 1 ? 0 : stdevEvalSharedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : maxEvalSharedMemoryUsage) << "," + << (numIterations <= 1 ? 0 : minEvalSharedMemoryUsage) << ","; + for (auto metaDataPair : perfFileMetadata) + { + fout << metaDataPair.second << ","; + } + fout << std::endl; + fout.close(); + } +} diff --git a/Tools/WinMLRunner/src/OutputHelper.h b/Tools/WinMLRunner/src/OutputHelper.h index 13b666de..7489e5eb 100644 --- a/Tools/WinMLRunner/src/OutputHelper.h +++ b/Tools/WinMLRunner/src/OutputHelper.h @@ -1,49 +1,9 @@ -#pragma once -#include "Common.h" -#include "CommandLineArgs.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #if defined(_AMD64_) // PIX markers only work on amd64 #include #endif - -using namespace winrt::Windows::AI::MachineLearning; -using namespace winrt::Windows::Storage::Streams; -using namespace ::Windows::Graphics::DirectX::Direct3D11; -using namespace winrt::Windows::Graphics::DirectX::Direct3D11; -using namespace DirectX::PackedVector; - -inline size_t hash_data(void const* ptr, size_t const bytes) noexcept -{ -#ifdef _WIN64 - constexpr size_t fnv_offset_basis = 14695981039346656037ULL; - constexpr size_t fnv_prime = 1099511628211ULL; -#else - constexpr size_t fnv_offset_basis = 2166136261U; - constexpr size_t fnv_prime = 16777619U; -#endif - size_t result = fnv_offset_basis; - uint8_t const* const buffer = static_cast(ptr); - - for (size_t next = 0; next < bytes; ++next) - { - result ^= buffer[next]; - result *= fnv_prime; - } - - return result; -} - +#include "TimerHelper.h" +#include "LearningModelDeviceHelper.h" // Stores performance information and handles output to the command line and CSV files. class OutputHelper { @@ -62,1162 +22,50 @@ class OutputHelper m_outputTensorHash.resize(numIterations, 0); } - void PrintLoadingInfo(const std::wstring& modelPath) const - { - wprintf(L"Loading model (path = %s)...\n", modelPath.c_str()); - } - + void PrintLoadingInfo(const std::wstring& modelPath) const; void PrintBindingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation, - const std::string& status) const - { - printf("Binding (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation = " - "%s)...%s\n", - TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(), - TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(), - status.c_str()); - } - + const std::string& status) const; void PrintEvaluatingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation, - const std::string& status) const - { - printf("Evaluating (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation " - "= %s)...%s\n", - TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(), - TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(), - status.c_str()); - } - - void PrintModelInfo(std::wstring modelPath, LearningModel model) const - { - std::cout << "=================================================================" << std::endl; - std::wcout << "Name: " << model.Name().c_str() << std::endl; - std::wcout << "Author: " << model.Author().c_str() << std::endl; - std::wcout << "Version: " << model.Version() << std::endl; - std::wcout << "Domain: " << model.Domain().c_str() << std::endl; - std::wcout << "Description: " << model.Description().c_str() << std::endl; - std::wcout << "Path: " << modelPath << std::endl; - std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl; - - std::cout << std::endl; - // print out information about input of model - std::cout << "Input Feature Info:" << std::endl; - for (auto&& inputFeature : model.InputFeatures()) - { - PrintFeatureDescriptorInfo(inputFeature); - } - // print out information about output of model - std::cout << "Output Feature Info:" << std::endl; - for (auto&& outputFeature : model.OutputFeatures()) - { - PrintFeatureDescriptorInfo(outputFeature); - } - std::cout << "=================================================================" << std::endl; - std::cout << std::endl; - } - - void PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const - { - // IMPORTANT: This learningModelFeatureKind array needs to match the "enum class - // LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h - const std::string learningModelFeatureKind[] = { - "Tensor", - "Sequence", - "Map", - "Image", - }; - std::wstring name(descriptor.Name()); - std::wcout << "Name: " << name << std::endl; - std::wcout << "Feature Kind: " << FeatureDescriptorToString(descriptor) << std::endl; - std::cout << std::endl; - } - - void PrintHardwareInfo() const - { - std::cout << "WinML Runner" << std::endl; - std::cout << "Printing available GPUs with DXGI.." << std::endl; - com_ptr factory; - CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void()); - std::vector> validAdapters; - for (UINT i = 0;; ++i) - { - com_ptr spAdapter; - if (factory->EnumAdapters1(i, spAdapter.put()) != S_OK) - { - break; - } - DXGI_ADAPTER_DESC1 pDesc; - spAdapter->GetDesc1(&pDesc); - - // is a software adapter - if (pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE || (pDesc.VendorId == 0x1414 && pDesc.DeviceId == 0x8c)) - { - continue; - } - // valid GPU adapter - else - { - printf("Index: %d, Description: %ls\n", static_cast(validAdapters.size()), pDesc.Description); - validAdapters.push_back(spAdapter); - } - } - std::cout << std::endl; - } - - static void PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device) - { - if (device.DeviceType == DeviceType::CPU) - { - std::cout << "\nCreated LearningModelDevice with CPU device" << std::endl; - return; - } - - IDirect3DDevice d3dDevice = device.LearningModelDevice.Direct3D11Device(); - com_ptr dxgi; - dxgi = d3dDevice.try_as(); - if (dxgi) - { - com_ptr dxgiDevice; - dxgi->GetInterface(__uuidof(IDXGIDevice), dxgiDevice.put_void()); - com_ptr adapter; - dxgiDevice->GetAdapter(adapter.put()); - DXGI_ADAPTER_DESC description; - if (SUCCEEDED(adapter->GetDesc(&description))) - { - std::wcout << L"\nCreated LearningModelDevice with GPU: " << description.Description << std::endl; - } - } - else - { - std::cout << "Failed to Print Learning Model Device Information" << std::endl; - } - } - + const std::string& status) const; + void PrintModelInfo(const std::wstring& modelPath, const LearningModel& model) const; + void PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const; + void PrintHardwareInfo() const; void PrintResults(const Profiler& profiler, uint32_t numIterations, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType, - DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const - { - double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER); - double createSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER); - - double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER); - double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER); - double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER); - double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER); - double firstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER); - - double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER); - double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER); - double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER); - double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER); - double firstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER); - - double firstLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); - double firstLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double firstLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double firstLoadPeakWorkingSetUsage = profiler[LOAD_MODEL].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); - - double firstSessionCreationWorkingSetMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE); - double firstSessionCreationSharedMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double firstSessionCreationDedicatedMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double firstSessionPeakWorkingSetUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); - - double averageBindMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevBindMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE); - double minBindMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE); - double maxBindMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE); - double firstBindMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double firstBindPeakMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); - - double averageEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevEvalMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); - double minEvalMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE); - double maxEvalMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE); - double firstEvalMemoryUsage = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double firstEvalPeakMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE); - - double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - double firstBindDedicatedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - double firstEvalDedicatedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - double firstBindSharedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - - double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - double firstEvalSharedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - - double firstIterationWorkingSetMemoryUsage = - profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE) + - profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE) + - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE) + - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - - double firstIterationSharedMemoryUsage = - profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) + - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - - double firstIterationDedicatedMemoryUsage = - profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) + - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - - double firstIterationPeakWorkingSet = firstLoadPeakWorkingSetUsage + firstSessionPeakWorkingSetUsage + - firstBindPeakMemoryUsage + firstEvalPeakMemoryUsage; - - printf("\nResults (device = %s, numIterations = %d, inputBinding = %s, inputDataType = %s, " - "deviceCreationLocation = %s):\n", - TypeHelper::Stringify(deviceType).c_str(), numIterations, - TypeHelper::Stringify(inputBindingType).c_str(), TypeHelper::Stringify(inputDataType).c_str(), - TypeHelper::Stringify(deviceCreationLocation).c_str()); - - std::cout << "\nFirst Iteration Performance (load, bind, session creation, and evaluate): " << std::endl; - std::cout << " Load: " << loadTime << " ms" << std::endl; - std::cout << " Bind: " << firstBindTime << " ms" << std::endl; - std::cout << " Session Creation: " << createSessionTime << " ms" << std::endl; - std::cout << " Evaluate: " << firstEvalTime << " ms" << std::endl; - - if (isPerformanceConsoleOutputVerbose) - { - std::cout << "\n Working Set Memory usage (load): " << firstLoadWorkingSetMemoryUsage << " MB" - << std::endl; - std::cout << " Working Set Memory usage (session creation): " << firstSessionCreationWorkingSetMemoryUsage - << " MB" << std::endl; - std::cout << " Working Set Memory usage (bind): " << firstBindMemoryUsage << " MB" << std::endl; - } - else - { - std::cout << std::endl; - } - std::cout << " Working Set Memory usage (evaluate): " << firstEvalMemoryUsage << " MB" << std::endl; - std::cout << " Working Set Memory usage (load, bind, session creation, and evaluate): " - << firstIterationWorkingSetMemoryUsage << " MB" << std::endl; - - if (isPerformanceConsoleOutputVerbose) - { - std::cout << std::endl; - std::cout << " Peak Working Set Memory Difference (from start to load): " << firstLoadPeakWorkingSetUsage - << " MB" << std::endl; - std::cout << " Peak Working Set Memory Difference (from model load to session creation): " - << firstSessionPeakWorkingSetUsage << " MB" << std::endl; - std::cout << " Peak Working Set Memory Difference (from session to bind): " << firstBindPeakMemoryUsage - << " MB" << std::endl; - std::cout << " Peak Working Set Memory Difference (from bind to evaluate): " << firstEvalPeakMemoryUsage - << " MB" << std::endl; - } - - std::cout << " Peak Working Set Memory Difference (load, bind, session creation, and evaluate): " - << firstIterationPeakWorkingSet << " MB" << std::endl; - - if (isPerformanceConsoleOutputVerbose) - { - std::cout << "\n Dedicated Memory usage (load): " << firstLoadDedicatedMemoryUsage << " MB" << std::endl; - std::cout << " Dedicated Memory usage (session creation): " << firstSessionCreationDedicatedMemoryUsage - << " MB" << std::endl; - std::cout << " Dedicated Memory usage (bind): " << firstBindDedicatedMemoryUsage << " MB" << std::endl; - } - else - { - std::cout << std::endl; - } - std::cout << " Dedicated Memory usage (evaluate): " << firstEvalDedicatedMemoryUsage << " MB" << std::endl; - std::cout << " Dedicated Memory usage (load, bind, session creation, and evaluate): " - << firstIterationDedicatedMemoryUsage << " MB" << std::endl; - - if (isPerformanceConsoleOutputVerbose) - { - std::cout << "\n Shared Memory usage (load): " << firstLoadSharedMemoryUsage << " MB" << std::endl; - std::cout << " Shared Memory usage (session creation): " << firstSessionCreationSharedMemoryUsage << " MB" - << std::endl; - std::cout << " Shared Memory usage (bind): " << firstBindSharedMemoryUsage << " MB" << std::endl; - } - else - { - std::cout << std::endl; - } - std::cout << " Shared Memory usage (evaluate): " << firstEvalSharedMemoryUsage << " MB" << std::endl; - std::cout << " Shared Memory usage (load, bind, session creation, and evaluate): " - << firstIterationSharedMemoryUsage << " MB" << std::endl; - - if (numIterations > 1) - { - printf("\nAverage Performance excluding first iteration. Iterations %d to %d. (Iterations greater than 1 " - "only bind and evaluate)\n", - 2, numIterations); - std::cout << " Average Bind: " << averageBindTime << " ms" << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Minimum Bind: " << minBindTime << " ms" << std::endl; - std::cout << " Maximum Bind: " << maxBindTime << " ms" << std::endl; - std::cout << " Standard Deviation Bind: " << stdevBindTime << " ms" << std::endl; - } - std::cout << " Average Evaluate: " << averageEvalTime << " ms" << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Minimum Evaluate: " << minEvalTime << " ms" << std::endl; - std::cout << " Maximum Evaluate: " << maxEvalTime << " ms" << std::endl; - std::cout << " Standard Deviation Evaluate: " << stdevEvalTime << " ms" << std::endl; - } - - std::cout << "\n Average Working Set Memory usage (bind): " << averageBindMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Working Set Memory usage (bind): " << minBindMemoryUsage << " MB" << std::endl; - std::cout << " Max Working Set Memory usage (bind): " << maxBindMemoryUsage << " MB" << std::endl; - std::cout << " Standard Deviation Working Set Memory usage (bind): " << stdevBindMemoryUsage << " MB" - << std::endl; - } - std::cout << " Average Working Set Memory usage (evaluate): " << averageEvalMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Working Set Memory usage (evaluate): " << minEvalMemoryUsage << " MB" << std::endl; - std::cout << " Max Working Set Memory usage (evaluate): " << maxEvalMemoryUsage << " MB" << std::endl; - std::cout << " Standard Deviation Working Set Memory usage (evaluate): " << stdevEvalMemoryUsage - << " MB" << std::endl; - } - - std::cout << "\n Average Dedicated Memory usage (bind): " << averageBindDedicatedMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Dedicated Memory usage (bind): " << minBindDedicatedMemoryUsage << " MB" - << std::endl; - std::cout << " Max Dedicated Memory usage (bind): " << maxBindDedicatedMemoryUsage << " MB" - << std::endl; - std::cout << " Standard Deviation Working Set Memory usage (evaluate): " - << stdevBindDedicatedMemoryUsage << " MB" << std::endl; - } - std::cout << " Average Dedicated Memory usage (evaluate): " << averageEvalDedicatedMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Dedicated Memory usage (evaluate): " << minEvalDedicatedMemoryUsage << " MB" - << std::endl; - std::cout << " Max Dedicated Memory usage (evaluate): " << maxEvalDedicatedMemoryUsage << " MB" - << std::endl; - std::cout << " Standard Deviation Dedicated Memory usage (evaluate): " << stdevEvalDedicatedMemoryUsage - << " MB" << std::endl; - } - - std::cout << "\n Average Shared Memory usage (bind): " << averageBindSharedMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Shared Memory usage (bind): " << minBindSharedMemoryUsage << " MB" << std::endl; - std::cout << " Max Shared Memory usage (bind): " << maxBindSharedMemoryUsage << " MB" << std::endl; - std::cout << " Standard Deviation Shared Memory usage (bind): " << stdevBindSharedMemoryUsage << " MB" - << std::endl; - } - std::cout << " Average Shared Memory usage (evaluate): " << averageEvalSharedMemoryUsage << " MB" - << std::endl; - if (isPerformanceConsoleOutputVerbose) - { - std::cout << " Min Shared Memory usage (evaluate): " << minEvalSharedMemoryUsage << " MB" << std::endl; - std::cout << " Max Shared Memory usage (evaluate): " << maxEvalSharedMemoryUsage << " MB" << std::endl; - std::cout << " Standard Deviation Shared Memory usage (evaluate): " << stdevEvalSharedMemoryUsage - << " MB" << std::endl; - } - } - std::cout << std::endl << std::endl << std::endl; - } - - static std::wstring FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor) - { - switch (descriptor.Kind()) - { - case LearningModelFeatureKind::Tensor: - { - auto tensorDescriptor = descriptor.as(); - return TypeHelper::Stringify(tensorDescriptor.TensorKind()); - } - case LearningModelFeatureKind::Image: - { - auto imageDescriptor = descriptor.as(); - std::wstring str = L"Image (Height: " + std::to_wstring(imageDescriptor.Height()) + L", Width: " + - std::to_wstring(imageDescriptor.Width()) + L")"; - return str; - } - case LearningModelFeatureKind::Map: - { - auto mapDescriptor = descriptor.as(); - std::wstring str = L"Map<" + TypeHelper::Stringify(mapDescriptor.KeyKind()) + L","; - str += FeatureDescriptorToString(mapDescriptor.ValueDescriptor()); - str += L">"; - return str; - } - case LearningModelFeatureKind::Sequence: - { - auto sequenceDescriptor = descriptor.as(); - std::wstring str = L"List<" + FeatureDescriptorToString(sequenceDescriptor.ElementDescriptor()) + L">"; - return str; - } - default: - return (L"Invalid feature %s.", descriptor.Name().c_str()); - } - } - - static bool doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor) - { - switch (descriptor.Kind()) - { - case LearningModelFeatureKind::Tensor: - { - return descriptor.as().TensorKind() == TensorKind::Float16; - } - break; - case LearningModelFeatureKind::Map: - { - auto mapDescriptor = descriptor.as(); - if (mapDescriptor.KeyKind() == TensorKind::Float16) - { - return true; - } - return doesDescriptorContainFP16(mapDescriptor.ValueDescriptor()); - } - break; - case LearningModelFeatureKind::Sequence: - { - return doesDescriptorContainFP16(descriptor.as().ElementDescriptor()); - } - break; - default: - { - return false; - } - } - } - - static bool doesModelContainFP16(const LearningModel model) - { - for (auto&& inputFeature : model.InputFeatures()) - { - if (doesDescriptorContainFP16(inputFeature)) - { - return true; - } - } - return false; - } - - void SaveLoadTimes(Profiler& profiler, uint32_t iterNum) - { - m_clockLoadTimes[iterNum] = profiler[LOAD_MODEL].GetClockTime(); - } - void SaveBindTimes(Profiler& profiler, uint32_t iterNum) - { - m_clockBindTimes[iterNum] = - (iterNum == 0) ? profiler[BIND_VALUE_FIRST_RUN].GetClockTime() : profiler[BIND_VALUE].GetClockTime(); - } - void SaveEvalPerformance(Profiler& profiler, uint32_t iterNum) - { - enum WINML_MODEL_TEST_PERF eval = (iterNum == 0) ? EVAL_MODEL_FIRST_RUN : EVAL_MODEL; - m_clockEvalTimes[iterNum] = profiler[eval].GetClockTime(); - m_CPUWorkingDiff[iterNum] = profiler[eval].GetCpuWorkingDiff(); - m_CPUWorkingStart[iterNum] = profiler[eval].GetCpuWorkingStart(); - m_GPUSharedDiff[iterNum] = profiler[eval].GetGpuSharedDiff(); - m_GPUSharedStart[iterNum] = profiler[eval].GetGpuSharedStart(); - m_GPUDedicatedDiff[iterNum] = profiler[eval].GetGpuDedicatedDiff(); - } - - void SaveResult(uint32_t iterationNum, std::string result, int hashcode) - { - m_outputResult[iterationNum] = result; - m_outputTensorHash[iterationNum] = hashcode; - } - - void SetDefaultPerIterationFolder(const std::wstring& folderName) - { - m_folderNamePerIteration = folderName; - if (std::filesystem::create_directories(m_folderNamePerIteration.c_str()) != 0) - std::wcout << L"Folder [" + m_folderNamePerIteration + L"] cannot be created"; - } - - void SetDefaultCSVFileNamePerIteration() - { - m_csvFileNamePerIterationSummary = m_folderNamePerIteration + L"\\Summary.csv"; - } - - void SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args, std::wstring& featureName) - { - if (args.UseCPU() && args.UseGPU()) - { - if (!m_flagGpuDevice) - { - m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration"; - if (iterationNum == args.NumIterations() - 1 || args.SaveTensorMode() == L"First") - { - m_flagGpuDevice = true; - } - } - else - { - m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration"; - } - } - else if (args.UseGPU()) - { - m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration"; - } - else - { - m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration"; - } - m_csvFileNamePerIterationResult = m_fileNameResultDevice + std::to_wstring(iterationNum + 1) + L".csv"; - } - - void SetCSVFileName(const std::wstring& fileName) { m_csvFileName = fileName; } - + DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const; + void SaveLoadTimes(Profiler& profiler, uint32_t iterNum); + void SaveBindTimes(Profiler& profiler, uint32_t iterNum); + void SaveEvalPerformance(Profiler& profiler, uint32_t iterNum); + void SaveResult(uint32_t iterationNum, std::string result, int hashcode); + void SetDefaultPerIterationFolder(const std::wstring& folderName); + void SetDefaultCSVFileNamePerIteration(); + std::wstring GetDefaultCSVFileNamePerIteration(); + std::wstring GetCsvFileNamePerIterationResult(); + void SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args, std::wstring& featureName); + void SetCSVFileName(const std::wstring& fileName); void WritePerIterationPerformance(const CommandLineArgs& args, const std::wstring model, - const std::wstring imagePath) - { - if (m_csvFileNamePerIterationSummary.length() > 0) - { - bool bNewFile = false; - std::ifstream fin; - fin.open(m_csvFileNamePerIterationSummary); - std::filebuf* outbuf = fin.rdbuf(); - if (EOF == outbuf->sbumpc()) - { - bNewFile = true; - } - fin.close(); - - std::ofstream fout; - fout.open(m_csvFileNamePerIterationSummary, std::ios_base::app); - - std::wstring_convert> converter; - std::string modelName = converter.to_bytes(model); - std::string fileNameResultDevice = converter.to_bytes(m_fileNameResultDevice); - std::string inputName = args.IsCSVInput() ? converter.to_bytes(args.CsvPath()) - : args.IsImageInput() ? converter.to_bytes(imagePath) : ""; - - if (bNewFile) - { - if (args.IsPerIterationCapture()) - { - fout << "Model Name" - << "," - << "Input Name" - << "," - << "Iterations" - << "," - << "Iteration Number " - << "," - << "CPU Working Set Diff (MB)" - << "," - << "CPU Working Set Start (MB)" - << "," - << "GPU Shared Memory Diff (MB)" - << "," - << "GPU Shared Memory Start (MB)" - << "," - << "GPU Dedicated Memory Diff (MB)" - << "," - << "Load (ms)" - << "," - << "Bind (ms)" - << "," - << "Evaluate (ms)" - << ","; - - if (args.IsSaveTensor()) - { - fout << "Result" - << "," - << "OutputTensorHash" - << "," - << "FileName"; - } - } - - else if (args.IsSaveTensor()) - { - fout << "Iteration Number" - << "," - << "Result" - << "," - << "OutputTensorHash" - << "," - << "FileName"; - } - fout << std::endl; - } - - if (args.IsPerIterationCapture()) - { - for (uint32_t i = 0; i < args.NumIterations(); i++) - { - fout << modelName << "," << inputName << "," << args.NumIterations() << "," << i + 1 << "," - << m_CPUWorkingDiff[i] << "," << m_CPUWorkingStart[i] << "," << m_GPUSharedDiff[i] << "," - << m_GPUSharedStart[i] << "," << m_GPUDedicatedDiff[i] << "," << m_clockLoadTimes[i] << "," - << m_clockBindTimes[i] << "," << m_clockEvalTimes[i] << ","; - - if (args.IsSaveTensor() && - (args.SaveTensorMode() == L"All" || (args.SaveTensorMode() == L"First" && i == 0))) - { - fout << m_outputResult[i] << "," << m_outputTensorHash[i] << "," - << fileNameResultDevice + std::to_string(i + 1) + ".csv" - << ","; - } - fout << std::endl; - } - } - else if (args.IsSaveTensor()) - { - for (uint32_t i = 0; i < args.NumIterations(); i++) - { - fout << i + 1 << "," << m_outputResult[i] << "," << m_outputTensorHash[i] << "," - << fileNameResultDevice + std::to_string(i + 1) + ".csv" << std::endl; - if (args.SaveTensorMode() == L"First" && i == 0) - { - break; - } - } - } - fout.close(); - } - } - - template - void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, - std::vector>& maxValues, std::ofstream& fout, unsigned int k) - { - // Create a priority queue of size k that pops the lowest value first - // We will remove lowest values as we iterate over all the values - auto cmp = [](std::pair x, std::pair y) { return x.first > y.first; }; - std::priority_queue, std::vector>, decltype(cmp)> topKvalues(cmp); - - T* tensor = (T*)buffer; - int size = uCapacity / sizeof(T); - for (int i = 0; i < size; i++) - { - float val = 0; - if (!std::is_same::value) - { - val = *(tensor + i); - } - else - { - val = XMConvertHalfToFloat(static_cast(*(tensor + i))); - } - if (args.IsSaveTensor()) - { - fout << i << "," << val << std::endl; - } - - if (topKvalues.size() < k) - { - topKvalues.push({ val, i }); - } - else if (k > 0) - { - auto maxValue = topKvalues.top().first; - if (maxValue < val) - { - topKvalues.pop(); - topKvalues.push({ val, i }); - } - } - } - while (!topKvalues.empty()) - { - auto pair = topKvalues.top(); - maxValues.push_back(pair); - topKvalues.pop(); - } - // Put vector in order of highest value to lowest - std::reverse(maxValues.begin(), maxValues.end()); - } - + const std::wstring imagePath); void WritePerformanceDataToCSV(const Profiler& profiler, int numIterations, std::wstring model, const std::string& deviceType, const std::string& inputBinding, const std::string& inputType, const std::string& deviceCreationLocation, - const std::vector>& perfFileMetadata) const - { - double averageLoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER); - double stdevLoadTime = profiler[LOAD_MODEL].GetStdev(CounterType::TIMER); - double minLoadTime = profiler[LOAD_MODEL].GetMin(CounterType::TIMER); - double maxLoadTime = profiler[LOAD_MODEL].GetMax(CounterType::TIMER); - uint32_t numberLoadIterations = profiler[LOAD_MODEL].GetCount(); - - double averageCreateSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER); - double stdevCreateSessionTime = profiler[CREATE_SESSION].GetStdev(CounterType::TIMER); - double minCreateSessionTime = profiler[CREATE_SESSION].GetMin(CounterType::TIMER); - double maxCreateSessionTime = profiler[CREATE_SESSION].GetMax(CounterType::TIMER); - uint32_t numberCreateSessionIterations = profiler[CREATE_SESSION].GetCount(); - - double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER); - double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER); - double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER); - double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER); - - double averageFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER); - double stdevFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::TIMER); - double minFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::TIMER); - double maxFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::TIMER); - - double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER); - double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER); - double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER); - double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER); - - double averageFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER); - double stdevFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetStdev(CounterType::TIMER); - double minFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMin(CounterType::TIMER); - double maxFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMax(CounterType::TIMER); - - double averageLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); - double minLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::WORKING_SET_USAGE); - double maxLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::WORKING_SET_USAGE); - - double averageCreateSessionWorkingSetMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevCreateSessionWorkingSetMemoryUsage = - profiler[CREATE_SESSION].GetStdev(CounterType::WORKING_SET_USAGE); - double minCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::WORKING_SET_USAGE); - double maxCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::WORKING_SET_USAGE); - - double averageBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE); - double minBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE); - double maxBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE); - - double averageFirstBindWorkingSetMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevFirstBindWorkingSetMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::WORKING_SET_USAGE); - double minFirstBindWorkingSetMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::WORKING_SET_USAGE); - double maxFirstBindWorkingSetMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::WORKING_SET_USAGE); - - double averageEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE); - double minEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE); - double maxEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE); - - double averageFirstEvalWorkingSetMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double stdevFirstEvalWorkingSetMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double minFirstEvalWorkingSetMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - double maxFirstEvalWorkingSetMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE); - - double averageLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageCreateSessionDedicatedMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevCreateSessionDedicatedMemoryUsage = - profiler[CREATE_SESSION].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minCreateSessionDedicatedMemoryUsage = - profiler[CREATE_SESSION].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxCreateSessionDedicatedMemoryUsage = - profiler[CREATE_SESSION].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageFirstBindDedicatedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevFirstBindDedicatedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minFirstBindDedicatedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxFirstBindDedicatedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE); - double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageFirstEvalDedicatedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double stdevFirstEvalDedicatedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double minFirstEvalDedicatedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - double maxFirstEvalDedicatedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE); - - double averageLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - - double averageCreateSessionSharedMemoryUsage = - profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevCreateSessionSharedMemoryUsage = - profiler[CREATE_SESSION].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - - double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - - double averageFirstBindSharedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevFirstBindSharedMemoryUsage = - profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - - double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE); - double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE); - double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE); - - double averageFirstEvalSharedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double stdevFirstEvalSharedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double minFirstEvalSharedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - double maxFirstEvalSharedMemoryUsage = - profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE); - - if (!m_csvFileName.empty()) - { - // Check if header exists - bool bNewFile = false; - std::ifstream fin; - fin.open(m_csvFileName); - std::filebuf* outbuf = fin.rdbuf(); - if (EOF == outbuf->sbumpc()) - { - bNewFile = true; - } - fin.close(); - - std::ofstream fout; - fout.open(m_csvFileName, std::ios_base::app); - - std::wstring_convert> converter; - std::string modelName = converter.to_bytes(model); - - if (bNewFile) - { - fout << "model name" - << "," - << "device type" - << "," - << "input binding" - << "," - << "input type" - << "," - << "device creation location" - << "," - << "iterations" - << "," - << "load iterations" - << "," - << "session creation iterations" - << "," - << "average load (ms)" - << "," - << "standard deviation load (ms)" - << "," - << "min load (ms)" - << "," - << "max load (ms)" - << "," - << "average session creation (ms)" - << "," - << "standard deviation session creation (ms)" - << "," - << "min session creation (ms)" - << "," - << "max session creation (ms)" - << "," - << "average first bind (ms)" - << "," - << "standard deviation first bind (ms)" - << "," - << "min first bind (ms)" - << "," - << "max first bind (ms)" - << "," - << "average bind (ms)" - << "," - << "standard deviation bind (ms)" - << "," - << "min bind (ms)" - << "," - << "max bind (ms)" - << "," - << "average first evaluate (ms)" - << "," - << "standard deviation first evaluate (ms)" - << "," - << "min first evaluate (ms)" - << "," - << "max first evaluate (ms)" - << "," - << "average evaluate (ms)" - << "," - << "standard deviation evaluate (ms)" - << "," - << "min evaluate (ms)" - << "," - << "max evaluate (ms)" - << "," - << "load average working set memory (MB)" - << "," - << "load standard deviation working set memory (MB)" - << "," - << "load min working set memory (MB)" - << "," - << "load max working set memory (MB)" - << "," - << "session creation average working set memory (MB)" - << "," - << "session creation standard deviation working set memory (MB)" - << "," - << "session creation min working set memory (MB)" - << "," - << "session creation max working set memory (MB)" - << "," - << "first bind average working set memory (MB)" - << "," - << "first bind standard deviation working set memory (MB)" - << "," - << "first bind min working set memory (MB)" - << "," - << "first bind max working set memory (MB)" - << "," - << "bind average working set memory (MB)" - << "," - << "bind standard deviation working set memory (MB)" - << "," - << "bind min working set memory (MB)" - << "," - << "bind max working set memory (MB)" - << "," - << "first evaluate average working set memory (MB)" - << "," - << "first evaluate standard deviation working set memory (MB)" - << "," - << "first evaluate min working set memory (MB)" - << "," - << "first evaluate max working set memory (MB)" - << "," - << "evaluate average working set memory (MB)" - << "," - << "evaluate standard deviation working set memory (MB)" - << "," - << "evaluate min working set memory (MB)" - << "," - << "evaluate max working set memory (MB)" - << "," - << "load average dedicated memory (MB)" - << "," - << "load standard deviation dedicated memory (MB)" - << "," - << "load min dedicated memory (MB)" - << "," - << "load max dedicated memory (MB)" - << "," - << "session creation average dedicated memory (MB)" - << "," - << "session creation standard deviation dedicated memory (MB)" - << "," - << "session creation min dedicated memory (MB)" - << "," - << "session creation max dedicated memory (MB)" - << "," - << "first bind average dedicated memory (MB)" - << "," - << "first bind standard deviation dedicated memory (MB)" - << "," - << "first bind min dedicated memory (MB)" - << "," - << "first bind max dedicated memory (MB)" - << "," - << "bind average dedicated memory (MB)" - << "," - << "bind standard deviation dedicated memory (MB)" - << "," - << "bind min dedicated memory (MB)" - << "," - << "bind max dedicated memory (MB)" - << "," - << "first evaluate average dedicated memory (MB)" - << "," - << "first evaluate standard deviation dedicated memory (MB)" - << "," - << "first evaluate min dedicated memory (MB)" - << "," - << "first evaluate max dedicated memory (MB)" - << "," - << "evaluate average dedicated memory (MB)" - << "," - << "evaluate standard deviation dedicated memory (MB)" - << "," - << "evaluate min dedicated memory (MB)" - << "," - << "evaluate max dedicated memory (MB)" - << "," - << "load average shared memory (MB)" - << "," - << "load standard deviation shared memory (MB)" - << "," - << "load min shared memory (MB)" - << "," - << "load max shared memory (MB)" - << "," - << "session creation average shared memory (MB)" - << "," - << "session creation standard deviation shared memory (MB)" - << "," - << "session creation min shared memory (MB)" - << "," - << "session creation max shared memory (MB)" - << "," - << "first bind average shared memory (MB)" - << "," - << "first bind standard deviation shared memory (MB)" - << "," - << "first bind min shared memory (MB)" - << "," - << "first bind max shared memory (MB)" - << "," - << "bind average shared memory (MB)" - << "," - << "bind standard deviation shared memory (MB)" - << "," - << "bind min shared memory (MB)" - << "," - << "bind max shared memory (MB)" - << "," - << "first evaluate average shared memory (MB)" - << "," - << "first evaluate standard deviation shared memory (MB)" - << "," - << "first evaluate min shared memory (MB)" - << "," - << "first evaluate max shared memory (MB)" - << "," - << "evaluate average shared memory (MB)" - << "," - << "evaluate standard deviation shared memory (MB)" - << "," - << "evaluate min shared memory (MB)" - << "," - << "evaluate max shared memory (MB)" - << ","; - for (auto metaDataPair : perfFileMetadata) - { - fout << metaDataPair.first << ","; - } - fout << std::endl; - } - fout << modelName << "," << deviceType << "," << inputBinding << "," << inputType << "," - << deviceCreationLocation << "," << numIterations << "," << numberLoadIterations << "," << numberCreateSessionIterations << "," - << averageLoadTime << "," << stdevLoadTime << "," << minLoadTime << "," << maxLoadTime << "," - << averageCreateSessionTime << "," << stdevCreateSessionTime << "," << minCreateSessionTime << "," << maxCreateSessionTime << "," - << averageFirstBindTime << "," << stdevFirstBindTime << "," << minFirstBindTime << "," << maxFirstBindTime << "," - << (numIterations <= 1 ? 0 : averageBindTime) << "," << (numIterations <= 1 ? 0 : stdevBindTime) << "," - << (numIterations <= 1 ? 0 : minBindTime) << "," << (numIterations <= 1 ? 0 : maxBindTime) << "," - << averageFirstEvalTime << "," << stdevFirstEvalTime << "," << minFirstEvalTime << "," << maxFirstEvalTime<< "," - << (numIterations <= 1 ? 0 : averageEvalTime) << "," << (numIterations <= 1 ? 0 : stdevEvalTime) << "," - << (numIterations <= 1 ? 0 : minEvalTime) << "," << (numIterations <= 1 ? 0 : maxEvalTime) << "," - - << averageLoadWorkingSetMemoryUsage << "," << stdevLoadWorkingSetMemoryUsage << "," << minLoadWorkingSetMemoryUsage << "," << maxLoadWorkingSetMemoryUsage << "," - << averageCreateSessionWorkingSetMemoryUsage << "," << stdevCreateSessionWorkingSetMemoryUsage << "," << minCreateSessionWorkingSetMemoryUsage << "," << maxCreateSessionWorkingSetMemoryUsage << "," - << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageBindWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevBindWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxBindWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minBindWorkingSetMemoryUsage) << "," - << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageEvalWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevEvalWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxEvalWorkingSetMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minEvalWorkingSetMemoryUsage) << "," - - << averageLoadDedicatedMemoryUsage << "," << stdevLoadDedicatedMemoryUsage << "," << minLoadDedicatedMemoryUsage << "," << maxLoadDedicatedMemoryUsage << "," - << averageCreateSessionDedicatedMemoryUsage << "," << stdevCreateSessionDedicatedMemoryUsage << "," << minCreateSessionDedicatedMemoryUsage << "," << maxCreateSessionDedicatedMemoryUsage << "," - << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageBindDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevBindDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxBindDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minBindDedicatedMemoryUsage) << "," - << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageEvalDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevEvalDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxEvalDedicatedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minEvalDedicatedMemoryUsage) << "," - - << averageLoadSharedMemoryUsage << "," << stdevLoadSharedMemoryUsage << "," << minLoadSharedMemoryUsage << "," << maxLoadSharedMemoryUsage << "," - << averageCreateSessionSharedMemoryUsage << "," << stdevCreateSessionSharedMemoryUsage << "," << minCreateSessionSharedMemoryUsage << "," << maxCreateSessionSharedMemoryUsage << "," - << averageFirstBindSharedMemoryUsage << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," << maxFirstBindSharedMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageBindSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevBindSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxBindSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minBindSharedMemoryUsage) << "," - << averageFirstBindSharedMemoryUsage << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," << maxFirstBindSharedMemoryUsage << "," - << (numIterations <= 1 ? 0 : averageEvalSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : stdevEvalSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : maxEvalSharedMemoryUsage) << "," - << (numIterations <= 1 ? 0 : minEvalSharedMemoryUsage) << ","; - for (auto metaDataPair : perfFileMetadata) - { - fout << metaDataPair.second << ","; - } - fout << std::endl; - fout.close(); - } - } - - std::vector m_clockLoadTimes; - std::vector m_clockBindTimes; - std::vector m_clockEvalTimes; - - std::wstring getCsvFileNamePerIterationResult() { return m_csvFileNamePerIterationResult; } -#if defined(_AMD64_) + const std::vector>& perfFileMetadata) const; + static void PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device); + static std::wstring FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor); + static bool doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor); + static bool doesModelContainFP16(const LearningModel& model); + template + static void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, + std::vector>& maxValues, std::ofstream& fout, unsigned int k); // PIX markers only work on amd64 +#if defined(_AMD64_) com_ptr& GetGraphicsAnalysis() { return m_graphicsAnalysis; } #endif private: + std::vector m_clockLoadTimes; + std::vector m_clockBindTimes; + std::vector m_clockEvalTimes; std::wstring m_csvFileName; std::wstring m_csvFileNamePerIterationSummary; std::wstring m_csvFileNamePerIterationResult; @@ -1240,4 +88,4 @@ class OutputHelper // PIX markers only work on amd64 com_ptr m_graphicsAnalysis = nullptr; #endif -}; +}; \ No newline at end of file diff --git a/Tools/WinMLRunner/src/Run.cpp b/Tools/WinMLRunner/src/Run.cpp index 55c0a912..5dcb33b7 100644 --- a/Tools/WinMLRunner/src/Run.cpp +++ b/Tools/WinMLRunner/src/Run.cpp @@ -7,6 +7,7 @@ #include #include "Scenarios.h" #include + using namespace winrt::Windows::Graphics::DirectX::Direct3D11; using namespace winrt::Windows::Foundation::Metadata; std::vector GenerateInputFeatures(const LearningModel& model, const CommandLineArgs& args, diff --git a/Tools/WinMLRunner/src/Run.h b/Tools/WinMLRunner/src/Run.h index 3796cd05..93c4aacc 100644 --- a/Tools/WinMLRunner/src/Run.h +++ b/Tools/WinMLRunner/src/Run.h @@ -1,6 +1,6 @@ #include "CommandLineArgs.h" #include "LearningModelDeviceHelper.h" - +#include "TimerHelper.h" int run(CommandLineArgs& args, Profiler& profiler, const std::vector& deviceList, diff --git a/Tools/WinMLRunner/src/TimerHelper.h b/Tools/WinMLRunner/src/TimerHelper.h index 7608ed63..2dff78f5 100644 --- a/Tools/WinMLRunner/src/TimerHelper.h +++ b/Tools/WinMLRunner/src/TimerHelper.h @@ -1,7 +1,5 @@ #pragma once -#include "Common.h" -#include #include #ifndef DISABLE_GPU_COUNTERS #include diff --git a/Tools/WinMLRunner/src/TypeHelper.h b/Tools/WinMLRunner/src/TypeHelper.h index db18a684..44ef5850 100644 --- a/Tools/WinMLRunner/src/TypeHelper.h +++ b/Tools/WinMLRunner/src/TypeHelper.h @@ -1,7 +1,11 @@ #pragma once #include "Common.h" +#ifdef USE_WINML_NUGET +using namespace winrt::Microsoft::AI::MachineLearning; +#else using namespace winrt::Windows::AI::MachineLearning; +#endif using namespace winrt::Windows::Graphics::DirectX; using namespace winrt::Windows::Graphics::Imaging;