diff --git a/crates/cubecl-linalg/src/matmul/kernels/matmul/algorithm/cmma.rs b/crates/cubecl-linalg/src/matmul/kernels/matmul/algorithm/cmma.rs index 75dbdb93..75ab9665 100644 --- a/crates/cubecl-linalg/src/matmul/kernels/matmul/algorithm/cmma.rs +++ b/crates/cubecl-linalg/src/matmul/kernels/matmul/algorithm/cmma.rs @@ -38,7 +38,7 @@ impl base::Algorithm for Cmma { Self::EG, Self::ES, Self::GlobalMatmul, - batch::TransposedDispatch, + batch::SwizzleTransposedDispatch<2>, >; fn cube_count(problem: &MatmulProblem) -> CubeCount { diff --git a/crates/cubecl-linalg/src/matmul/kernels/matmul/base.rs b/crates/cubecl-linalg/src/matmul/kernels/matmul/base.rs index faae6a14..01d60d2b 100644 --- a/crates/cubecl-linalg/src/matmul/kernels/matmul/base.rs +++ b/crates/cubecl-linalg/src/matmul/kernels/matmul/base.rs @@ -153,8 +153,8 @@ pub(crate) fn matmul_cube_preparation> let cube_count = D::cube_count(&problem); let advanced_config = AdvancedConfig { - lhs_tiling_order: matmul::components::stage::TilingOrderConfig::RowMajor, - rhs_tiling_order: matmul::components::stage::TilingOrderConfig::ColMajor, + lhs_tiling_order: matmul::components::stage::TilingOrderConfig::ColMajor, + rhs_tiling_order: matmul::components::stage::TilingOrderConfig::RowMajor, enforced_tile_layout: (None, None), };