From f8bf1bd41e34ce8ee2aa690486d9a6799667d6df Mon Sep 17 00:00:00 2001 From: Edward Hutter Date: Tue, 19 Sep 2023 06:42:52 -0500 Subject: [PATCH 1/2] Use PMPI when appropriate --- src/interface/common.cxx | 16 ++++++++-------- src/shared/model.cxx | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/interface/common.cxx b/src/interface/common.cxx index df88ba85..65e09eaf 100644 --- a/src/interface/common.cxx +++ b/src/interface/common.cxx @@ -381,7 +381,7 @@ namespace CTF_int { void CommData::bcast(void * buf, int64_t count, MPI_Datatype mdtype, int root){ #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); int tsize_; MPI_Type_size(mdtype, &tsize_); @@ -394,7 +394,7 @@ namespace CTF_int { #endif MPI_Bcast(buf, count, mdtype, root, cm); #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); double exe_time = MPI_Wtime()-st_time; int tsize; MPI_Type_size(mdtype, &tsize); @@ -405,7 +405,7 @@ namespace CTF_int { void CommData::allred(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op){ #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); #endif #ifdef TUNE @@ -423,7 +423,7 @@ namespace CTF_int { double st_time = MPI_Wtime(); MPI_Allreduce(inbuf, outbuf, count, mdtype, op, cm); #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); #endif double exe_time = MPI_Wtime()-st_time; int tsize; @@ -437,7 +437,7 @@ namespace CTF_int { void CommData::red(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root){ #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); // change-of-observe int tsize_; @@ -454,7 +454,7 @@ namespace CTF_int { double st_time = MPI_Wtime(); MPI_Reduce(inbuf, outbuf, count, mdtype, op, root, cm); #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); #endif double exe_time = MPI_Wtime()-st_time; int tsize; @@ -476,7 +476,7 @@ namespace CTF_int { int64_t const * recv_displs){ #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); // change-of-observe int64_t tot_sz_ = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size; double tps_[] = {0.0, 1.0, log2(np), (double)tot_sz_}; @@ -568,7 +568,7 @@ namespace CTF_int { CTF_int::cdealloc(i32_recv_displs); } #ifdef TUNE - MPI_Barrier(cm); + PMPI_Barrier(cm); #endif double exe_time = MPI_Wtime()-st_time; int64_t tot_sz = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size; diff --git a/src/shared/model.cxx b/src/shared/model.cxx index ef17ebf4..aec9008e 100644 --- a/src/shared/model.cxx +++ b/src/shared/model.cxx @@ -255,7 +255,7 @@ namespace CTF_int { int tot_nrcol; //compute the total number of observations over all processors - MPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm); + PMPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm); //if there has been more than 16*nparam observations per processor, tune the model if (tot_nrcol >= 16.*np*nparam){ @@ -315,7 +315,7 @@ namespace CTF_int { i_st = nparam; } //find the max execution time over all processors - // MPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm); + // PMPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm); //double chunk = max_time / 1000.; //printf("%s chunk = %+1.2e\n",name,chunk); @@ -406,7 +406,7 @@ namespace CTF_int { } int sub_np = np; //std::min(np,32); MPI_Comm sub_comm; - MPI_Comm_split(cm, rk Date: Tue, 19 Sep 2023 10:25:59 -0500 Subject: [PATCH 2/2] Changed remaining MPI_Barrier to PMPI_Barrier --- src/redistribution/dgtog_redist_ror.h | 4 ++-- src/redistribution/redist.cxx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/redistribution/dgtog_redist_ror.h b/src/redistribution/dgtog_redist_ror.h index 031b6885..99dca2da 100644 --- a/src/redistribution/dgtog_redist_ror.h +++ b/src/redistribution/dgtog_redist_ror.h @@ -326,7 +326,7 @@ void dgtog_reshuffle(int const * sym, return; } #ifdef TUNE - MPI_Barrier(ord_glb_comm.cm); + PMPI_Barrier(ord_glb_comm.cm); #endif TAU_FSTART(dgtog_reshuffle); double st_time = MPI_Wtime(); @@ -709,7 +709,7 @@ void dgtog_reshuffle(int const * sym, sr->dealloc(tsr_data); #endif #ifdef TUNE - MPI_Barrier(ord_glb_comm.cm); + PMPI_Barrier(ord_glb_comm.cm); #endif double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)log2(ord_glb_comm.np), (double)std::max(old_dist.size, new_dist.size)*log2(ord_glb_comm.np)*sr->el_size}; diff --git a/src/redistribution/redist.cxx b/src/redistribution/redist.cxx index 2773e099..0946f2d7 100644 --- a/src/redistribution/redist.cxx +++ b/src/redistribution/redist.cxx @@ -481,7 +481,7 @@ namespace CTF_int { TAU_FSTART(block_reshuffle); #ifdef TUNE - MPI_Barrier(glb_comm.cm); + PMPI_Barrier(glb_comm.cm); double st_time = MPI_Wtime(); #endif @@ -602,7 +602,7 @@ namespace CTF_int { cdealloc(reqs); #ifdef TUNE - MPI_Barrier(glb_comm.cm); + PMPI_Barrier(glb_comm.cm); double exe_time = MPI_Wtime()-st_time; tps = (double*)malloc(3*sizeof(double)); tps[0] = exe_time;