Skip to content

Commit

Permalink
move data H<->D
Browse files Browse the repository at this point in the history
  • Loading branch information
reger-men committed Jul 5, 2021
1 parent b2932f8 commit 0e44ecd
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 16 deletions.
5 changes: 5 additions & 0 deletions include/backend/hpl_backendHIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include <stdio.h>
#include <stdlib.h>
#include <map>
#include <string>

extern "C" {
#include "hpl_pmatgen.h"
Expand Down Expand Up @@ -118,8 +120,11 @@ namespace HIP {
void atcpy(const int, const int, const double *, const int,
double *, const int);

void move_data(double *, const double *, const size_t, const int);

// BLAS members
namespace {
rocblas_handle _handle;
std::map<int, const char*> _memcpyKind;
}
}
8 changes: 8 additions & 0 deletions include/backend/hpl_backendWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ extern "C" {
#include "hpl_panel.h"

enum HPL_TARGET {T_DEFAULT, T_CPU, T_HIP, T_TEMPO};
enum HPL_MOVE_DIRECTION {M_H2H = 0,
M_H2D = 1,
M_D2H = 2,
M_D2D = 3,
M_DEFAULT = 4};


void HPL_BE_init(size_t, enum HPL_TARGET);

Expand Down Expand Up @@ -76,6 +82,8 @@ void HPL_BE_dlacpy(const int, const int, const double *, const int, double *, co
void HPL_BE_dlatcpy(const int, const int, const double *, const int,
double *, const int, enum HPL_TARGET);

void HPL_BE_move_data(double *, const double *, const size_t, enum HPL_MOVE_DIRECTION, enum HPL_TARGET);

#ifdef __cplusplus
}
#endif
4 changes: 3 additions & 1 deletion src/pfact/HPL_dlocmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ void HPL_dlocmax
myrow = PANEL->grid->myrow;
nprow = PANEL->grid->nprow;
nb = PANEL->nb;
kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );
//Adil
kk = PANEL->ii + II + ( ilindx = HPL_BE_idamax(N, A, 1, T_DEFAULT));
/*kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) );*/
Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow );
/*
* WORK[0] := local maximum absolute value scalar,
Expand Down
57 changes: 54 additions & 3 deletions testing/backend/HPL_backendHIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void HIP::init(size_t num_gpus)
GPUInfo("%-25s %-20lld", "[SharedMem]", "Shared Memory Per Block", (unsigned long long int)hipDeviceProp.sharedMemPerBlock);
GPUInfo("%-25s %-20d", "[Regs]", "Registers Per Block", hipDeviceProp.regsPerBlock);
GPUInfo("%-25s %-20d", "[WarpSize]", "WaveFront Size", hipDeviceProp.warpSize);
GPUInfo("%-25s %-20d", "[MaxThreads]", "Max Threads Per Block", hipDeviceProp.warpSize);
GPUInfo("%-25s %-20d", "[MaxThreads]", "Max Threads Per Block", hipDeviceProp.maxThreadsPerBlock);
GPUInfo("%-25s %-4d %-4d %-4d", "[MaxThreadsDim]", "Max Threads Dimension", hipDeviceProp.maxThreadsDim[0], hipDeviceProp.maxThreadsDim[1], hipDeviceProp.maxThreadsDim[2]);
GPUInfo("%-25s %-4d %-4d %-4d", "[MaxGridSize]", "Max Grid Size", hipDeviceProp.maxGridSize[0], hipDeviceProp.maxGridSize[1], hipDeviceProp.maxGridSize[2]);
GPUInfo("%-25s %-20lld", "[ConstMem]", "Total Constant Memory", (unsigned long long int)hipDeviceProp.totalConstMem);
Expand All @@ -38,6 +38,12 @@ void HIP::init(size_t num_gpus)
//Init ROCBlas
rocblas_initialize();
ROCBLAS_CHECK_STATUS(rocblas_create_handle(&_handle));

_memcpyKind[0] = "H2H";
_memcpyKind[1] = "H2D";
_memcpyKind[2] = "D2H";
_memcpyKind[3] = "D2D";
_memcpyKind[4] = "DEFAULT";
}

void HIP::release()
Expand Down Expand Up @@ -106,7 +112,7 @@ void HIP::matgen(const HPL_T_grid *GRID, const int M, const int N,
//TODO: generate numbers in this range (-0.5, 0.5]
ROCRAND_CHECK_STATUS(rocrand_generate_normal_double(generator, A, mp*nq, 0, 0.1));
ROCRAND_CHECK_STATUS(rocrand_destroy_generator(generator));
//gPrintMat(M,N,LDA,A);
//gPrintMat(5,5,LDA,A);
}

int HIP::idamax(const int N, const double *DX, const int INCX)
Expand Down Expand Up @@ -150,9 +156,26 @@ void HIP::trsm( const enum HPL_ORDER ORDER, const enum HPL_SIDE SIDE,
const double ALPHA, const double *A, const int LDA, double *B, const int LDB)
{
GPUInfo("%-25s %-8d%-8d \t%-5s", "[TRSM]", "With B of (R:C)", M, N, "HIP");
#if 0
//rocBLAS uses column-major storage for 2D arrays
ROCBLAS_CHECK_STATUS(rocblas_dtrsm(_handle, (rocblas_side)SIDE, (rocblas_fill)UPLO, (rocblas_operation)TRANSA,
(rocblas_diagonal)DIAG, M, N, &ALPHA, A, LDA, B, LDB));
#else
double * d_A, * d_B;
HIP::malloc((void**)&d_A, LDA*M*sizeof(double));
HIP::malloc((void**)&d_B, LDB*N*sizeof(double));

HIP::move_data(d_A, A, LDA*M*sizeof(double), 1);
HIP::move_data(d_B, B, LDB*N*sizeof(double), 1);

ROCBLAS_CHECK_STATUS(rocblas_dtrsm(_handle, (rocblas_side)SIDE, (rocblas_fill)UPLO, (rocblas_operation)TRANSA,
(rocblas_diagonal)DIAG, M, N, &ALPHA, d_A, LDA, d_B, LDB));

HIP::move_data(B, d_B, LDB*N*sizeof(double), 2);

HIP::free((void**)&d_A);
HIP::free((void**)&d_B);
#endif
}

void HIP::trsv(const enum HPL_ORDER ORDER, const enum HPL_UPLO UPLO,
Expand All @@ -172,10 +195,32 @@ void HIP::dgemm(const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANSA,
const double *B, const int LDB, const double BETA, double *C,
const int LDC)
{
GPUInfo("%-25s %-8d%-8d \t%-5s", "[DGEMM]", "With C of (R:C)", M, N, "HIP");
GPUInfo("%-25s %-8d%-8d \t%-5s", "[DGEMM]", "With C of (R:C)", LDC, N, "HIP");
#if 0
//rocBLAS uses column-major storage for 2D arrays
ROCBLAS_CHECK_STATUS(rocblas_dgemm(_handle, (rocblas_operation)TRANSA, (rocblas_operation)TRANSB,
M, N, K, &ALPHA, A, LDA, B, LDB, &BETA, C, LDC));
#else
double * d_A, * d_B, * d_C;
HIP::malloc((void**)&d_A, LDA*K*sizeof(double));
HIP::malloc((void**)&d_B, LDB*N*sizeof(double));
HIP::malloc((void**)&d_C, LDC*N*sizeof(double));

HIP::move_data(d_A, A, LDA*K*sizeof(double), 1);
HIP::move_data(d_B, B, LDB*N*sizeof(double), 1);
HIP::move_data(d_C, C, LDC*N*sizeof(double), 1);

ROCBLAS_CHECK_STATUS(rocblas_dgemm(_handle, (rocblas_operation)TRANSA, (rocblas_operation)TRANSB,
M, N, K, &ALPHA, d_A, LDA, d_B, LDB, &BETA, d_C, LDC));

HIP::move_data(C, d_C, LDC*N*sizeof(double), 2);

HIP::free((void**)&d_A);
HIP::free((void**)&d_B);
HIP::free((void**)&d_C);
#endif

hipDeviceSynchronize();
}

void HIP::dgemv(const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANS, const int M, const int N,
Expand Down Expand Up @@ -240,3 +285,9 @@ void HIP::atcpy(const int M, const int N, const double *A, const int LDA,
_dlatcpy<<<grid_size, block_size, 0, 0>>>(M, N, A, LDA, B, LDB);
}

void HIP::move_data(double *DST, const double *SRC, const size_t SIZE, const int KIND)
{
char title[25] = "[MOVE_"; strcat(title,_memcpyKind[KIND]); strcat(title,"]");
GPUInfo("%-25s %-12ld (B) \t%-5s", title, "Memory of size", SIZE, "HIP");
HIP_CHECK_ERROR(hipMemcpy(DST, SRC, SIZE, (hipMemcpyKind)KIND));
}
15 changes: 15 additions & 0 deletions testing/backend/HPL_backendWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,4 +335,19 @@ extern "C" {
HPL::dispatch(CPU::atcpy, M, N, A, LDA, B, LDB);
}
}

void HPL_BE_move_data(double *DST, const double *SRC, const size_t SIZE,
enum HPL_MOVE_DIRECTION KIND, enum HPL_TARGET TR)
{
switch(TR) {
case T_CPU :
DO_NOTHING();
break;
case T_HIP:
HPL::dispatch(HIP::move_data, DST, SRC, SIZE, (int)KIND);
break;
default:
DO_NOTHING();
}
}
} //extern "C"
42 changes: 30 additions & 12 deletions testing/ptest/HPL_pdtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,13 @@ void HPL_pdtest
/*
* Allocate dynamic memory
*/
//Adil
HPL_BE_malloc((void**)&vptr, ((size_t)(ALGO->align) + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * sizeof(double), T_TEMPO);
/*vptr = (void*)malloc( ( (size_t)(ALGO->align) +
(size_t)(mat.ld+1) * (size_t)(mat.nq) ) *
sizeof(double) );
*/
//Adil: temp: generate mat on CPU and move it to the CPU. FIXME: Generate the correct Matrix.
size_t bytes = ((size_t)(ALGO->align) + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * sizeof(double);

void * d_vptr = NULL;
HPL_BE_malloc((void**)&d_vptr, bytes, T_HIP);
vptr = (void*)malloc(bytes);

info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol;
(void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max,
GRID->all_comm );
Expand All @@ -179,7 +180,7 @@ void HPL_pdtest
(TEST->kskip)++;
/* some processes might have succeeded with allocation */
//Adil
if( vptr ) HPL_BE_free((void**)&vptr, T_TEMPO);
if( vptr ) HPL_BE_free((void**)&vptr, T_DEFAULT);
/*if (vptr) free(vptr);*/
return;
}
Expand All @@ -189,8 +190,25 @@ void HPL_pdtest
mat.A = (double *)HPL_PTR( vptr, ((size_t)(ALGO->align) * sizeof(double) ) );
mat.X = Mptr( mat.A, 0, mat.nq, mat.ld );
//Adil
HPL_BE_dmatgen(GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED, T_TEMPO);
HPL_BE_dmatgen(GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED, T_CPU);
//HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );

#if 0
mat.d_A = (double *)HPL_PTR( d_vptr, ((size_t)(ALGO->align) * sizeof(double) ) );
mat.d_X = Mptr( mat.d_A, 0, mat.nq, mat.ld );
HPL_BE_move_data(mat.d_A, mat.A, (N*(N+1)) * sizeof(double), M_H2D, T_HIP);
/*{
// Last row is the vector b
for(int y=0;y<6; y++){
for(int x=0;x<4; x++){
int index = x+y*mat.ld;
printf("%-4d:%-8lf\t", index, mat.d_A[index]);
}
printf("\n");
}
}*/
#endif

#ifdef HPL_CALL_VSIPL
mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A),
(vsip_length)(mat.ld * mat.nq),
Expand Down Expand Up @@ -331,7 +349,7 @@ void HPL_pdtest
(TEST->kpass)++;

//Adil
if( vptr ) HPL_BE_free((void**)&vptr, T_TEMPO);
if( vptr ) HPL_BE_free((void**)&vptr, T_DEFAULT);
/*if( vptr ) free( vptr ); */
return;
}
Expand All @@ -353,7 +371,7 @@ void HPL_pdtest
* and norm inf of b - A x. Display residual checks.
*/
//Adil
HPL_BE_dmatgen(GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED, T_TEMPO);
HPL_BE_dmatgen(GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED, T_DEFAULT);
/*HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED );*/
Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld );
AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld );
Expand Down Expand Up @@ -391,7 +409,7 @@ void HPL_pdtest
{
//Adil
HPL_BE_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1, T_TEMPO);
mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1, T_DEFAULT);
/*HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone,
mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 );*/
}
Expand Down Expand Up @@ -455,7 +473,7 @@ void HPL_pdtest
}
}
//Adil
if( vptr ) HPL_BE_free((void**)&vptr, T_TEMPO);
if( vptr ) HPL_BE_free((void**)&vptr, T_DEFAULT);
//if( vptr ) free( vptr );
/*
* End of HPL_pdtest
Expand Down

0 comments on commit 0e44ecd

Please sign in to comment.