diff --git a/crates/cubecl-cpp/src/cuda/wmma/cuda_compiler.rs b/crates/cubecl-cpp/src/cuda/wmma/cuda_compiler.rs index 17b5fdf7..dc725b23 100644 --- a/crates/cubecl-cpp/src/cuda/wmma/cuda_compiler.rs +++ b/crates/cubecl-cpp/src/cuda/wmma/cuda_compiler.rs @@ -91,7 +91,7 @@ impl WmmaCompiler> for CudaWmmaCompiler { gpu::Elem::Float(gpu::FloatKind::TF32), gpu::Elem::Float(gpu::FloatKind::TF32), gpu::Elem::Float(gpu::FloatKind::F32), - vec![(16, 8, 16)], + vec![(16, 16, 8)], )); } result