microsoft · pranavsharma · Oct 25, 2024 · Oct 25, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/.pipelines/nuget_config/x64/packages.config b/.pipelines/nuget_config/x64/packages.config
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="python" version="3.9.7" targetFramework="native" />
-  <package id="Microsoft.AI.DirectML" version="1.15.2" targetFramework="native" />
+  <package id="Microsoft.AI.DirectML" version="1.15.4" targetFramework="native" />
   <package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
 </packages>
diff --git a/.pipelines/nuget_config/x86/packages.config b/.pipelines/nuget_config/x86/packages.config
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="pythonx86" version="3.9.7" targetFramework="native" />
-  <package id="Microsoft.AI.DirectML" version="1.15.2" targetFramework="native" />
+  <package id="Microsoft.AI.DirectML" version="1.15.4" targetFramework="native" />
   <package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
 </packages>
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -148,6 +148,7 @@ option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algor
 option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
 option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
 option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)
+option(onnxruntime_USE_EXTERNAL_DAWN "Build with treating Dawn as external dependency. Will not link Dawn at build time." OFF)
 
 # Options related to reducing the binary size produced by the build
 # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
@@ -958,6 +959,9 @@ if (onnxruntime_USE_WEBGPU)
   list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
   list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
   list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
+  if (onnxruntime_USE_EXTERNAL_DAWN)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_EXTERNAL_DAWN=1)
+  endif()
 endif()
 if (onnxruntime_USE_CANN)
     list(APPEND ORT_PROVIDER_FLAGS  -DUSE_CANN=1)

diff --git a/cmake/external/dml.cmake b/cmake/external/dml.cmake
@@ -41,7 +41,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
   set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config)
   set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config)
   get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE)
-  set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.2)
+  set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.4)
 
   # Restore nuget packages, which will pull down the DirectML redist package.
   add_custom_command(

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -656,11 +656,16 @@ if (onnxruntime_USE_WEBGPU)
 
     # Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it.
     set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE)
+    # We are currently always using the D3D12 backend.
+    set(DAWN_ENABLE_D3D11 OFF CACHE BOOL "" FORCE)
   endif()
 
   onnxruntime_fetchcontent_makeavailable(dawn)
 
-  list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc)
+  if (NOT onnxruntime_USE_EXTERNAL_DAWN)
+    list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native)
+  endif()
+  list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_proc)
 endif()
 
 set(onnxruntime_LINK_DIRS)

diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake
@@ -22,6 +22,9 @@
   onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
     onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc)
+  if (NOT onnxruntime_USE_EXTERNAL_DAWN)
+    target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native)
+  endif()
+  target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_proc)
 
   set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -523,6 +523,9 @@ set (onnxruntime_global_thread_pools_test_SRC
           ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc
           ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc)
 
+set (onnxruntime_webgpu_external_dawn_test_SRC
+          ${TEST_SRC_DIR}/webgpu/external_dawn/main.cc)
+
 # tests from lowest level library up.
 # the order of libraries should be maintained, with higher libraries being added first in the list
 
@@ -1884,4 +1887,13 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
   endif()
 endif()
 
+if (onnxruntime_USE_WEBGPU AND onnxruntime_USE_EXTERNAL_DAWN)
+  AddTest(TARGET onnxruntime_webgpu_external_dawn_test
+          SOURCES ${onnxruntime_webgpu_external_dawn_test_SRC}
+          LIBS dawn::dawn_native ${onnxruntime_test_providers_libs}
+          DEPENDS ${all_dependencies}
+  )
+  onnxruntime_add_include_to_target(onnxruntime_webgpu_external_dawn_test dawn::dawncpp_headers dawn::dawn_headers)
+endif()
+
 include(onnxruntime_fuzz_test.cmake)
diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch
@@ -15,40 +15,55 @@ index 9c0bd6fa4e..bf8a57aeac 100644
      ###############################################################################
      # Do the 'complete_lib' build.
 diff --git a/src/dawn/native/Surface_metal.mm b/src/dawn/native/Surface_metal.mm
-index ce55acbd43..baa4835362 100644
+index ce55acbd43..2cfd363479 100644
 --- a/src/dawn/native/Surface_metal.mm
 +++ b/src/dawn/native/Surface_metal.mm
-@@ -36,7 +36,13 @@
+@@ -33,10 +33,18 @@
+
+ #import <QuartzCore/CAMetalLayer.h>
+
++#include "dawn/common/Platform.h"
++
  namespace dawn::native {
 
  bool InheritsFromCAMetalLayer(void* obj) {
 -    id<NSObject> object = static_cast<id>(obj);
 +    id<NSObject> object =
-+#if TARGET_OS_IOS
++#if DAWN_PLATFORM_IS(IOS)
 +        (__bridge id)obj;
-+#else
++#else   // DAWN_PLATFORM_IS(IOS)
 +        static_cast<id>(obj);
-+#endif
++#endif  // DAWN_PLATFORM_IS(IOS)
 +
      return [object isKindOfClass:[CAMetalLayer class]];
  }
 
 diff --git a/src/dawn/native/metal/SharedFenceMTL.mm b/src/dawn/native/metal/SharedFenceMTL.mm
-index bde8bfea07..f2f6459e91 100644
+index bde8bfea07..8906185d6f 100644
 --- a/src/dawn/native/metal/SharedFenceMTL.mm
 +++ b/src/dawn/native/metal/SharedFenceMTL.mm
-@@ -40,7 +40,13 @@ ResultOrError<Ref<SharedFence>> SharedFence::Create(
+@@ -25,6 +25,8 @@
+ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
++#include "dawn/common/Platform.h"
++
+ #include "dawn/native/metal/SharedFenceMTL.h"
+
+ #include "dawn/native/ChainUtils.h"
+@@ -39,8 +41,13 @@ ResultOrError<Ref<SharedFence>> SharedFence::Create(
+     const SharedFenceMTLSharedEventDescriptor* descriptor) {
      DAWN_INVALID_IF(descriptor->sharedEvent == nullptr, "MTLSharedEvent is missing.");
      if (@available(macOS 10.14, iOS 12.0, *)) {
-         return AcquireRef(new SharedFence(
+-        return AcquireRef(new SharedFence(
 -            device, label, static_cast<id<MTLSharedEvent>>(descriptor->sharedEvent)));
-+            device, label,
-+#if TARGET_OS_IOS
-+            (__bridge id<MTLSharedEvent>)(descriptor->sharedEvent)
-+#else
-+            static_cast<id<MTLSharedEvent>>(descriptor->sharedEvent)
-+#endif
-+            ));
++        return AcquireRef(new SharedFence(device, label,
++#if DAWN_PLATFORM_IS(IOS)
++                                          (__bridge id<MTLSharedEvent>)(descriptor->sharedEvent)
++#else   // DAWN_PLATFORM_IS(IOS)
++                                          static_cast<id<MTLSharedEvent>>(descriptor->sharedEvent)
++#endif  // DAWN_PLATFORM_IS(IOS)
++                                              ));
      } else {
          return DAWN_INTERNAL_ERROR("MTLSharedEvent not supported.");
      }

diff --git a/include/onnxruntime/core/framework/ortdevice.h b/include/onnxruntime/core/framework/ortdevice.h
@@ -17,6 +17,7 @@ struct OrtDevice {
   static const DeviceType GPU = 1;  // Nvidia or AMD
   static const DeviceType FPGA = 2;
   static const DeviceType NPU = 3;  // Ascend
+  static const DeviceType DML = 4;
 
   struct MemType {
     // Pre-defined memory types.

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -650,6 +650,9 @@
  *  constructors to construct an instance of a Status object from exceptions.
  */
 struct Status : detail::Base<OrtStatus> {
+  using Base = detail::Base<OrtStatus>;
+  using Base::Base;
+
   explicit Status(std::nullptr_t) noexcept {}               ///< Create an empty object, must be assigned a valid one to be used
   explicit Status(OrtStatus* status) noexcept;              ///< Takes ownership of OrtStatus instance returned from the C API.
   explicit Status(const Exception&) noexcept;               ///< Creates status instance out of exception
@@ -728,6 +731,9 @@
  *
  */
 struct CustomOpDomain : detail::Base<OrtCustomOpDomain> {
+  using Base = detail::Base<OrtCustomOpDomain>;
+  using Base::Base;
+
   explicit CustomOpDomain(std::nullptr_t) {}  ///< Create an empty CustomOpDomain object, must be assigned a valid one to be used
 
   /// \brief Wraps OrtApi::CreateCustomOpDomain
@@ -963,8 +969,10 @@
  *
  */
 struct ModelMetadata : detail::Base<OrtModelMetadata> {
-  explicit ModelMetadata(std::nullptr_t) {}                                   ///< Create an empty ModelMetadata object, must be assigned a valid one to be used
-  explicit ModelMetadata(OrtModelMetadata* p) : Base<OrtModelMetadata>{p} {}  ///< Used for interop with the C API
+  using Base = detail::Base<OrtModelMetadata>;
+  using Base::Base;
+
+  explicit ModelMetadata(std::nullptr_t) {}  ///< Create an empty ModelMetadata object, must be assigned a valid one to be used
 
   /** \brief Returns a copy of the producer name.
    *
@@ -1237,6 +1245,9 @@
  *
  */
 struct TensorTypeAndShapeInfo : detail::TensorTypeAndShapeInfoImpl<OrtTensorTypeAndShapeInfo> {
+  using Base = detail::TensorTypeAndShapeInfoImpl<OrtTensorTypeAndShapeInfo>;
+  using Base::Base;
+
   explicit TensorTypeAndShapeInfo(std::nullptr_t) {}                                                ///< Create an empty TensorTypeAndShapeInfo object, must be assigned a valid one to be used
   explicit TensorTypeAndShapeInfo(OrtTensorTypeAndShapeInfo* p) : TensorTypeAndShapeInfoImpl{p} {}  ///< Used for interop with the C API
   ConstTensorTypeAndShapeInfo GetConst() const { return ConstTensorTypeAndShapeInfo{this->p_}; }
@@ -1258,6 +1269,9 @@
  *
  */
 struct SequenceTypeInfo : detail::SequenceTypeInfoImpl<OrtSequenceTypeInfo> {
+  using Base = detail::SequenceTypeInfoImpl<OrtSequenceTypeInfo>;
+  using Base::Base;
+
   explicit SequenceTypeInfo(std::nullptr_t) {}                                                         ///< Create an empty SequenceTypeInfo object, must be assigned a valid one to be used
   explicit SequenceTypeInfo(OrtSequenceTypeInfo* p) : SequenceTypeInfoImpl<OrtSequenceTypeInfo>{p} {}  ///< Used for interop with the C API
   ConstSequenceTypeInfo GetConst() const { return ConstSequenceTypeInfo{this->p_}; }
@@ -1293,6 +1307,9 @@
  *
  */
 struct MapTypeInfo : detail::MapTypeInfoImpl<OrtMapTypeInfo> {
+  using Base = detail::MapTypeInfoImpl<OrtMapTypeInfo>;
+  using Base::Base;
+
   explicit MapTypeInfo(std::nullptr_t) {}                                          ///< Create an empty MapTypeInfo object, must be assigned a valid one to be used
   explicit MapTypeInfo(OrtMapTypeInfo* p) : MapTypeInfoImpl<OrtMapTypeInfo>{p} {}  ///< Used for interop with the C API
   ConstMapTypeInfo GetConst() const { return ConstMapTypeInfo{this->p_}; }
@@ -1324,6 +1341,9 @@
 /// the information about contained sequence or map depending on the ONNXType.
 /// </summary>
 struct TypeInfo : detail::TypeInfoImpl<OrtTypeInfo> {
+  using Base = detail::TypeInfoImpl<OrtTypeInfo>;
+  using Base::Base;
+
   explicit TypeInfo(std::nullptr_t) {}                                 ///< Create an empty TypeInfo object, must be assigned a valid one to be used
   explicit TypeInfo(OrtTypeInfo* p) : TypeInfoImpl<OrtTypeInfo>{p} {}  ///< C API Interop
 
@@ -1661,11 +1681,11 @@
  */
 struct Value : detail::ValueImpl<OrtValue> {
   using Base = detail::ValueImpl<OrtValue>;
+  using Base::Base;
   using OrtSparseValuesParam = detail::OrtSparseValuesParam;
   using Shape = detail::Shape;
 
-  explicit Value(std::nullptr_t) {}         ///< Create an empty Value object, must be assigned a valid one to be used
-  explicit Value(OrtValue* p) : Base{p} {}  ///< Used for interop with the C API
+  explicit Value(std::nullptr_t) {}  ///< Create an empty Value object, must be assigned a valid one to be used
   Value(Value&&) = default;
   Value& operator=(Value&&) = default;
 
@@ -1941,6 +1961,10 @@
 /// This struct provides life time management for custom op attribute
 /// </summary>
 struct OpAttr : detail::Base<OrtOpAttr> {
+  using Base = detail::Base<OrtOpAttr>;
+  using Base::Base;
+
+  explicit OpAttr(std::nullptr_t) {}
   OpAttr(const char* name, const void* data, int len, OrtOpAttrType type);
 };
 
@@ -2183,6 +2207,8 @@
 /// so it does not destroy the pointer the kernel does not own.
 /// </summary>
 struct KernelInfo : detail::KernelInfoImpl<OrtKernelInfo> {
+  using Base = detail::KernelInfoImpl<OrtKernelInfo>;
+  using Base::Base;
   explicit KernelInfo(std::nullptr_t) {}     ///< Create an empty instance to initialize later
   explicit KernelInfo(OrtKernelInfo* info);  ///< Take ownership of the instance
   ConstKernelInfo GetConst() const { return ConstKernelInfo{this->p_}; }
@@ -2192,6 +2218,9 @@
 /// Create and own custom defined operation.
 /// </summary>
 struct Op : detail::Base<OrtOp> {
+  using Base = detail::Base<OrtOp>;
+  using Base::Base;
+
   explicit Op(std::nullptr_t) {}  ///< Create an empty Operator object, must be assigned a valid one to be used
 
   explicit Op(OrtOp*);  ///< Take ownership of the OrtOp

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -51,7 +51,7 @@ inline void ThrowOnError(const Status& st) {
   }
 }
 
-inline Status::Status(OrtStatus* status) noexcept : Base<OrtStatus>{status} {
+inline Status::Status(OrtStatus* status) noexcept : detail::Base<OrtStatus>{status} {
 }
 
 inline Status::Status(const std::exception& e) noexcept {
@@ -1908,7 +1908,7 @@ inline void attr_utils::GetAttrs(const OrtKernelInfo* p, const char* name, std::
 
 inline KernelInfo::KernelInfo(OrtKernelInfo* info) : detail::KernelInfoImpl<OrtKernelInfo>{info} {}
 
-inline Op::Op(OrtOp* p) : Base<OrtOp>(p) {}
+inline Op::Op(OrtOp* p) : detail::Base<OrtOp>(p) {}
 
 inline Op Op::Create(const OrtKernelInfo* info, const char* op_name, const char* domain, int version,
                      const char** type_constraint_names,

diff --git a/js/scripts/prepare-onnx-node-tests.ts b/js/scripts/prepare-onnx-node-tests.ts
@@ -10,6 +10,8 @@ import * as path from 'path';
 import { downloadZip, extractFile } from './utils';
 
 const TEST_DATA_OPSET_VERSIONS = [
+  ['opset21', '1.16.2'],
+  ['opset20', '1.15.0'],
   ['opset19', '1.14.0'],
   ['opset18', '1.13.1'],
   ['opset17', '1.12.1'],

diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md
@@ -21,11 +21,11 @@ Do not modify directly.*
 | Atan | ai.onnx(7+) |  |
 | Atanh | ai.onnx(9+) |  |
 | Attention | com.microsoft(1+) | need implementing mask and past/present |
-| AveragePool | ai.onnx(7-9,10,11+); com.ms.internal.nhwc(7-9,10,11+) | need perf optimization; need implementing activation |
+| AveragePool | ai.onnx(7-9,10,11-18,19+); com.ms.internal.nhwc(7-9,10,11-18,19+) | need perf optimization; need implementing activation |
 | BatchNormalization | ai.onnx(7-8,9-13,14,15+); com.ms.internal.nhwc(7-8,9-13,14,15+) |  |
 | BiasAdd | com.microsoft(1+) |  |
 | BiasSplitGelu | com.microsoft(1+) |  |
-| Cast | ai.onnx(6-8,9-12,13-18,19+) |  |
+| Cast | ai.onnx(6-8,9-12,13-18,19-20,21+) |  |
 | Ceil | ai.onnx(6-12,13+) |  |
 | Clip | ai.onnx(6-10,11,12,13+) |  |
 | Concat | ai.onnx(1-3,4-10,11-12,13+) |  |
@@ -44,7 +44,7 @@ Do not modify directly.*
 | Exp | ai.onnx(6-12,13+) |  |
 | Expand | ai.onnx(8-12,13+) |  |
 | FastGelu | com.microsoft(1+) |  |
-| Flatten | ai.onnx(1-8,9-10,11-12,13+) |  |
+| Flatten | ai.onnx(1-8,9-10,11-12,13-20,21+) |  |
 | Floor | ai.onnx(6-12,13+) |  |
 | FusedConv | com.microsoft(1+) |  |
 | Gather | ai.onnx(1-10,11-12,13+) |  |
@@ -58,7 +58,7 @@ Do not modify directly.*
 | GreaterOrEqual | ai.onnx(12-15,16+) |  |
 | GroupQueryAttention | com.microsoft(1+) |  |
 | HardSigmoid | ai.onnx(6+) |  |
-| If | ai.onnx(1-10,11-12,13-18,19+) |  |
+| If | ai.onnx(1-10,11-12,13-18,19-20,21+) |  |
 | InstanceNormalization | ai.onnx(6+); com.ms.internal.nhwc(6+) |  |
 | LayerNormalization | ai.onnx(1-16,17+) |  |
 | LeakyRelu | ai.onnx(6-15,16+) |  |
@@ -74,7 +74,7 @@ Do not modify directly.*
 | MultiHeadAttention | com.microsoft(1+) | need implementing mask and past/present |
 | Neg | ai.onnx(6-12,13+) |  |
 | Not | ai.onnx(1+) |  |
-| Pad | ai.onnx(2-10,11-12,13-17,18,19+) |  |
+| Pad | ai.onnx(2-10,11-12,13-17,18,19-20,21+) |  |
 | Pow | ai.onnx(7-11,12,13-14,15+) |  |
 | QuickGelu | com.microsoft(1+) |  |
 | Range | ai.onnx(11+) |  |
@@ -83,9 +83,9 @@ Do not modify directly.*
 | ReduceL2 | ai.onnx(1-10,11-12,13-17,18+) |  |
 | ReduceLogSum | ai.onnx(1-10,11-12,13-17,18+) |  |
 | ReduceLogSumExp | ai.onnx(1-10,11-12,13-17,18+) |  |
-| ReduceMax | ai.onnx(1-10,11,12,13-17,18+) |  |
+| ReduceMax | ai.onnx(1-10,11,12,13-17,18-19,20+) |  |
 | ReduceMean | ai.onnx(1-10,11-12,13-17,18+) |  |
-| ReduceMin | ai.onnx(1-10,11,12,13-17,18+) |  |
+| ReduceMin | ai.onnx(1-10,11,12,13-17,18-19,20+) |  |
 | ReduceProd | ai.onnx(1-10,11-12,13-17,18+) |  |
 | ReduceSum | ai.onnx(1-10,11-12,13+) |  |
 | ReduceSumSquare | ai.onnx(1-10,11-12,13-17,18+) |  |
@@ -104,12 +104,12 @@ Do not modify directly.*
 | Softmax | ai.onnx(1-10,11-12,13+) |  |
 | Split | ai.onnx(1,2-10,11-12,13-17,18+) |  |
 | Sqrt | ai.onnx(6-12,13+) |  |
-| Squeeze | ai.onnx(1-10,11-12,13+) |  |
+| Squeeze | ai.onnx(1-10,11-12,13-20,21+) |  |
 | Sub | ai.onnx(7-12,13,14+) |  |
 | Tan | ai.onnx(7+) |  |
 | Tanh | ai.onnx(6-12,13+) |  |
 | ThresholdedRelu | ai.onnx(10+) |  |
 | Tile | ai.onnx(6-12,13+) |  |
-| Transpose | ai.onnx(1-12,13+) | need perf optimization |
-| Unsqueeze | ai.onnx(1-10,11-12,13+) |  |
+| Transpose | ai.onnx(1-12,13-20,21+) | need perf optimization |
+| Unsqueeze | ai.onnx(1-10,11-12,13-20,21+) |  |
 | Where | ai.onnx(9-15,16+) |  |