From 2f31560430c3c28a1eed9588d69165b0a0179954 Mon Sep 17 00:00:00 2001 From: Jeff Bloomfield <38966965+jeffbloo@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:37:30 -0700 Subject: [PATCH] Enable generic feature level devices in DML EP (#20114) ### Description Enable NPUs supporting DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML and D3D_FEATURE_LEVEL_1_0_GENERIC with DML EP. This also begins ingesting DX headers through the DirectX-Headers repo. Note that this includes an update to cgamanifest.json for onnx-tensorrt which is triggered during re-generation due to a prior changes to deps.txt. ### Motivation and Context --- cgmanifests/generated/cgmanifest.json | 12 +++++- cmake/deps.txt | 1 + cmake/external/dml.cmake | 11 +++++ .../DmlExecutionProvider/src/AllocationInfo.h | 2 +- .../src/DmlExternalBufferAllocator.h | 2 +- .../src/ExecutionProvider.cpp | 15 +++---- .../src/IExecutionProvider.h | 2 +- .../src/Operators/DmlDFT.h | 2 +- .../src/Operators/DmlGridSample.h | 2 +- .../dml/DmlExecutionProvider/src/precomp.h | 2 +- .../providers/dml/dml_provider_factory.cc | 40 ++++++++++++++----- .../dml/dml_provider_factory_creator.h | 4 +- .../templates/download-deps.yml | 4 +- 13 files changed, 70 insertions(+), 29 deletions(-) diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index 3e13a567b1eaa..5a955324414e1 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -216,7 +216,7 @@ "component": { "type": "git", "git": { - "commitHash": "a43ce67187bab219520fd80f21af8bbd4354bc8c", + "commitHash": "bacfaaa951653cd4e72efe727a543567cb38f7de", "repositoryUrl": "https://github.com/onnx/onnx-tensorrt.git" }, "comments": "onnx_tensorrt" @@ -341,6 +341,16 @@ }, "comments": "composable_kernel" } + }, + { + "component": { + "type": "git", + "git": { + "commitHash": "de28d93dfa9ebf3e473127c1c657e1920a5345ee", + "repositoryUrl": "https://github.com/microsoft/DirectX-Headers.git" + }, + "comments": "directx_headers" + } } ] } diff --git a/cmake/deps.txt b/cmake/deps.txt index 22ad9338ea59a..720dbe107c9f1 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -57,3 +57,4 @@ cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.1.0.zip;757f90a79 utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/5356c4a943a35e74d7cdc69486afcb8703b9a59a.zip;522382c2af437e09124287e5879ab64af5b2e299 +directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e \ No newline at end of file diff --git a/cmake/external/dml.cmake b/cmake/external/dml.cmake index ae7e6d3801a64..8f18059ffdfe5 100644 --- a/cmake/external/dml.cmake +++ b/cmake/external/dml.cmake @@ -99,3 +99,14 @@ else() set(DML_PACKAGE_DIR ${dml_INCLUDE_DIR}/..) endif() endif() + +FetchContent_Declare( + directx_headers + URL ${DEP_URL_directx_headers} + URL_HASH SHA1=${DEP_SHA1_directx_headers} +) + +FetchContent_Populate(directx_headers) +set(directx_headers_INCLUDE_DIRS "${directx_headers_SOURCE_DIR}/include") + +include_directories(BEFORE ${directx_headers_INCLUDE_DIRS}) diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AllocationInfo.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AllocationInfo.h index 59a827a4ffa1b..9c395e9cc906b 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AllocationInfo.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AllocationInfo.h @@ -5,7 +5,7 @@ #include #include -#include +#include "directx/d3d12.h" #include "DmlResourceWrapper.h" namespace Dml diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlExternalBufferAllocator.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlExternalBufferAllocator.h index 22fd3be42c416..b22f0b2853e5d 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlExternalBufferAllocator.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlExternalBufferAllocator.h @@ -4,7 +4,7 @@ #pragma once #include -#include +#include "directx/d3d12.h" #include #include #include "External/D3DX12/d3dx12.h" diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp index 6c347ebdca7c1..d24bf3350b292 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp @@ -142,13 +142,7 @@ namespace Dml } } -// ORT release pipelines agent pools do not have 19H1 SDK installed which defines D3D_FEATURE_LEVEL_1_0_CORE. -// Once ORT/WinML github project can be built with VS2019, we can update these pools to use install the 19H1 SDK -// using the command line installer tool with VS2019 -// Task 24384515: Update ORT AIInfra release agent pool to install 19H1 SDK on VM bootstrap -#define D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE ((D3D_FEATURE_LEVEL)0x1000) - - ExecutionProviderImpl::ExecutionProviderImpl(IDMLDevice* dmlDevice, ID3D12Device* d3d12Device, ID3D12CommandQueue* queue, bool enableMetacommands, bool enableDynamicGraphFusion) +ExecutionProviderImpl::ExecutionProviderImpl(IDMLDevice* dmlDevice, ID3D12Device* d3d12Device, ID3D12CommandQueue* queue, bool enableMetacommands, bool enableDynamicGraphFusion) : m_d3d12Device(d3d12Device), m_dmlDevice(dmlDevice), m_areMetacommandsEnabled(enableMetacommands), @@ -157,7 +151,10 @@ namespace Dml D3D12_FEATURE_DATA_FEATURE_LEVELS featureLevels = {}; D3D_FEATURE_LEVEL featureLevelsList[] = { - D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE, + #ifndef _GAMING_XBOX + D3D_FEATURE_LEVEL_1_0_GENERIC, + #endif + D3D_FEATURE_LEVEL_1_0_CORE, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_12_0, @@ -181,7 +178,7 @@ namespace Dml m_native16BitShaderOpsSupported = featureOptions.Native16BitShaderOpsSupported; } - m_isMcdmDevice = (featureLevels.MaxSupportedFeatureLevel == D3D_FEATURE_LEVEL_1_0_CORE_PRIVATE); + m_isMcdmDevice = (featureLevels.MaxSupportedFeatureLevel <= D3D_FEATURE_LEVEL_1_0_CORE); m_areCustomHeapsSupported = !m_isMcdmDevice; if (m_isMcdmDevice) diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h index 17fd7c18ba4a1..f4c3f326274ad 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/IExecutionProvider.h @@ -3,7 +3,7 @@ #pragma once -#include +#include "directx/d3d12.h" #include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h" diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlDFT.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlDFT.h index c285cf1a070b9..ddd6d56128461 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlDFT.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlDFT.h @@ -4,7 +4,7 @@ #include "../../../OperatorAuthorHelper/OperatorHelper.h" #include "../External/D3DX12/d3dx12.h" -#include +#include "directx/d3d12.h" // NOTE: When this operator's implementation is moved into DML, the associated FP16 fallback // should be removed from IsCustomOpShader(...) in diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h index 4bbc8a4b718da..4f5da9dd05491 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlGridSample.h @@ -4,7 +4,7 @@ #include "../MLOperatorAuthorImpl.h" #include "../External/D3DX12/d3dx12.h" -#include +#include "directx/d3d12.h" // NOTE: When this operator's implementation is moved into DML, the associated FP16 fallback // should be removed from IsCustomOpShader(...) in diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/precomp.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/precomp.h index 1a796b25c5d1f..5bdc68b685ee4 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/precomp.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/precomp.h @@ -35,7 +35,7 @@ #include #include #else // Desktop -#include +#include "directx/d3d12.h" #include #include "External/D3DX12/d3dx12.h" #endif diff --git a/onnxruntime/core/providers/dml/dml_provider_factory.cc b/onnxruntime/core/providers/dml/dml_provider_factory.cc index b2688094a6d78..9ba1c35efb27b 100644 --- a/onnxruntime/core/providers/dml/dml_provider_factory.cc +++ b/onnxruntime/core/providers/dml/dml_provider_factory.cc @@ -1,9 +1,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include #include +#define INITGUID +#include +#include +#undef INITGUID + +#include "directx/d3d12.h" + #include #ifndef _GAMING_XBOX #include @@ -157,12 +163,15 @@ static ComPtr EnumerateDXCoreAdapters(IDXCoreAdapterFactory* // When DXCore APIs are available QI for relevant enumeration interfaces constexpr bool use_dxcore_workload_enumeration = false; if (!use_dxcore_workload_enumeration) { - // Get a list of all the adapters that support compute - GUID attributes[]{ DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE }; ORT_THROW_IF_FAILED( - adapter_factory->CreateAdapterList(_countof(attributes), - attributes, + adapter_factory->CreateAdapterList(1, + &DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, adapter_list.GetAddressOf())); + + if (adapter_list->GetAdapterCount() == 0) + { + ORT_THROW_IF_FAILED(adapter_factory->CreateAdapterList(1, &DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE, adapter_list.GetAddressOf())); + } } return adapter_list; @@ -477,6 +486,9 @@ static D3D12_COMMAND_LIST_TYPE CalculateCommandListType(ID3D12Device* d3d12_devi D3D12_FEATURE_DATA_FEATURE_LEVELS feature_levels = {}; D3D_FEATURE_LEVEL feature_levels_list[] = { + #ifndef _GAMING_XBOX + D3D_FEATURE_LEVEL_1_0_GENERIC, + #endif D3D_FEATURE_LEVEL_1_0_CORE, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_11_1, @@ -492,8 +504,9 @@ static D3D12_COMMAND_LIST_TYPE CalculateCommandListType(ID3D12Device* d3d12_devi sizeof(feature_levels) )); - auto is_feature_level_1_0_core = (feature_levels.MaxSupportedFeatureLevel == D3D_FEATURE_LEVEL_1_0_CORE); - if (is_feature_level_1_0_core) { + auto use_compute_command_list = (feature_levels.MaxSupportedFeatureLevel <= D3D_FEATURE_LEVEL_1_0_CORE); + if (use_compute_command_list) + { return D3D12_COMMAND_LIST_TYPE_COMPUTE; } @@ -533,12 +546,21 @@ std::shared_ptr DMLProviderFactoryCreator::CreateFrom auto feature_level = D3D_FEATURE_LEVEL_11_0; if (IsNPU(adapter.Get())) { - feature_level = D3D_FEATURE_LEVEL_1_0_CORE; + feature_level = D3D_FEATURE_LEVEL_1_0_GENERIC; } // Create D3D12 Device from DXCore Adapter ComPtr d3d12_device; - ORT_THROW_IF_FAILED(D3D12CreateDevice(adapter.Get(), feature_level, IID_GRAPHICS_PPV_ARGS(d3d12_device.ReleaseAndGetAddressOf()))); + if (feature_level == D3D_FEATURE_LEVEL_1_0_GENERIC) { + // Attempt to create a D3D_FEATURE_LEVEL_1_0_CORE device first, in case the device supports this + // feature level and the D3D runtime does not support D3D_FEATURE_LEVEL_1_0_GENERIC + HRESULT hrUnused = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_1_0_CORE, IID_GRAPHICS_PPV_ARGS(d3d12_device.ReleaseAndGetAddressOf())); + } + + if (!d3d12_device) { + ORT_THROW_IF_FAILED(D3D12CreateDevice(adapter.Get(), feature_level, IID_GRAPHICS_PPV_ARGS(d3d12_device.ReleaseAndGetAddressOf()))); + } + return CreateDMLDeviceAndProviderFactory(d3d12_device.Get(), disable_metacommands, enable_dynamic_graph_fusion); } diff --git a/onnxruntime/core/providers/dml/dml_provider_factory_creator.h b/onnxruntime/core/providers/dml/dml_provider_factory_creator.h index 0fab9fe902526..61d0cba0e1f98 100644 --- a/onnxruntime/core/providers/dml/dml_provider_factory_creator.h +++ b/onnxruntime/core/providers/dml/dml_provider_factory_creator.h @@ -6,12 +6,12 @@ #include #include -#include +#include "directx/d3d12.h" #include "core/framework/provider_options.h" #include "core/providers/providers.h" #include "core/providers/dml/dml_provider_factory.h" -#include +#include #include namespace onnxruntime { diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 4fd33b4f0bc09..eb87a57024ed4 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.145 + version: 1.0.149 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.145 + version: 1.0.149 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here.