diff --git a/frame/3/bli_l3_sup_int.c b/frame/3/bli_l3_sup_int.c index e54e01d7c7..e1deb32907 100644 --- a/frame/3/bli_l3_sup_int.c +++ b/frame/3/bli_l3_sup_int.c @@ -181,6 +181,13 @@ err_t bli_gemmsup_int if ( mu >= nu ) use_bp = TRUE; else /* if ( mu < nu ) */ use_bp = FALSE; + // In zgemm, mkernel outperforms nkernel for both m > n and n < m. + // mkernel is forced for zgemm. + if(bli_is_dcomplex(dt)) + { + use_bp = TRUE;//mkernel + } + // If the parallel thread factorization was automatic, we update it // with a new factorization based on the matrix dimensions in units // of micropanels.