Skip to content

Commit

Permalink
tune some parameters for better performance
Browse files Browse the repository at this point in the history
  • Loading branch information
binbin committed Apr 25, 2022
1 parent bfbd57a commit 016d4e9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
8 changes: 4 additions & 4 deletions src/pgesv/HPL_pdgesvK2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,23 +209,23 @@ void HPL_pdgesvK2
// overlap row swap2 with update1
HPL_BE_stream_wait_event(HPL_COMPUTESTREAM, HPL_RS_1, T_HIP);
(void) HPL_pdupdate( NULL, NULL, panel[k], nn );
HPL_BE_pdlaswp(panel[0], nn * 3, T_HIP);
HPL_BE_pdlaswp(panel[0], nn * 4, T_HIP);
HPL_BE_event_record(HPL_RS_2, T_HIP);

}
HPL_BE_device_sync(T_HIP);

// split update and row swap step
// overlap row swap3 with update2
HPL_pdupdate( NULL, NULL, panel[0], nn * 3 );
HPL_pdupdate( NULL, NULL, panel[0], nn * 4 );

HPL_BE_pdlaswp(panel[0], nq - (nn * 3), T_HIP);
HPL_BE_pdlaswp(panel[0], nq - (nn * 4), T_HIP);
HPL_BE_event_record(HPL_RS_3, T_HIP);

//overlap row swap1 of next iteration with update3
HPL_BE_stream_wait_event(HPL_COMPUTESTREAM, HPL_RS_3, T_HIP);

HPL_pdupdate( NULL, NULL, panel[0], nq - (nn * 3) );
HPL_pdupdate( NULL, NULL, panel[0], nq - (nn * 4) );

// overlap update with data copy

Expand Down
6 changes: 4 additions & 2 deletions testing/backend/HPL_backendHIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,7 @@ void HIP::dgemm(const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANSA,
GPUInfo("%-25s %-8d%-8d%-8d \t%-5s", "[DGEMM]", "With C of (R:C)", M, N, K, "HIP");
#if 1
//rocBLAS uses column-major storage for 2D arrays
hipEventRecord(dgemmStart, computeStream);
ROCBLAS_CHECK_STATUS(rocblas_dgemm(_handle, (rocblas_operation)TRANSA, (rocblas_operation)TRANSB,
M, N, K, &ALPHA, A, LDA, B, LDB, &BETA, C, LDC));
#else
Expand Down Expand Up @@ -861,8 +862,8 @@ __global__ void _dlaswp00N(const int N, const int M,
const int* __restrict__ IPIV) {
// const int* IPIV) {

__shared__ double s_An_init[2048];
__shared__ double s_An_ipiv[2048];
__shared__ double s_An_init[512];
__shared__ double s_An_ipiv[512];

const int m = threadIdx.x;
const int n = blockIdx.x;
Expand Down Expand Up @@ -934,6 +935,7 @@ void HIP::pdlaswp(HPL_T_panel *PANEL, const int NN){
mp = PANEL->mp - jb; nq0 = 0; nn = n - nq0;

const int block_size = 512, grid_size = nn;
hipStreamWaitEvent(pdlaswpStream, dgemmStart, 0);
hipLaunchKernelGGL(_dlaswp00N, dim3(grid_size), dim3(block_size), 0, pdlaswpStream,
nn, jb, Aptr, lda, ipiv);
}

0 comments on commit 016d4e9

Please sign in to comment.