From a51cfd780c931a7fec88b644c12b3bad29e08a95 Mon Sep 17 00:00:00 2001 From: Madan mohan Manokar Date: Tue, 15 Dec 2020 12:55:53 +0530 Subject: [PATCH] sup zgemm improvement 1. In zgemm, mkernel outperforms nkernel for both m > n, and n > m. 2. Irrespective of mu and nu sizes, mkernel is forced for zgemm based on analysis done. Change-Id: Iafb7ddb2519c17cf2225da84d6cc74ed985cc21e AMD-Internal: [CPUPL-1352] --- frame/3/bli_l3_sup_int.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/frame/3/bli_l3_sup_int.c b/frame/3/bli_l3_sup_int.c index e54e01d7c7..e1deb32907 100644 --- a/frame/3/bli_l3_sup_int.c +++ b/frame/3/bli_l3_sup_int.c @@ -181,6 +181,13 @@ err_t bli_gemmsup_int if ( mu >= nu ) use_bp = TRUE; else /* if ( mu < nu ) */ use_bp = FALSE; + // In zgemm, mkernel outperforms nkernel for both m > n and n < m. + // mkernel is forced for zgemm. + if(bli_is_dcomplex(dt)) + { + use_bp = TRUE;//mkernel + } + // If the parallel thread factorization was automatic, we update it // with a new factorization based on the matrix dimensions in units // of micropanels.