Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use PMPI when appropriate #153

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/interface/common.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ namespace CTF_int {

void CommData::bcast(void * buf, int64_t count, MPI_Datatype mdtype, int root){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);

int tsize_;
MPI_Type_size(mdtype, &tsize_);
Expand All @@ -394,7 +394,7 @@ namespace CTF_int {
#endif
MPI_Bcast(buf, count, mdtype, root, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
double exe_time = MPI_Wtime()-st_time;
int tsize;
MPI_Type_size(mdtype, &tsize);
Expand All @@ -405,7 +405,7 @@ namespace CTF_int {

void CommData::allred(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif

#ifdef TUNE
Expand All @@ -423,7 +423,7 @@ namespace CTF_int {
double st_time = MPI_Wtime();
MPI_Allreduce(inbuf, outbuf, count, mdtype, op, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int tsize;
Expand All @@ -437,7 +437,7 @@ namespace CTF_int {

void CommData::red(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);

// change-of-observe
int tsize_;
Expand All @@ -454,7 +454,7 @@ namespace CTF_int {
double st_time = MPI_Wtime();
MPI_Reduce(inbuf, outbuf, count, mdtype, op, root, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int tsize;
Expand All @@ -476,7 +476,7 @@ namespace CTF_int {
int64_t const * recv_displs){

#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
// change-of-observe
int64_t tot_sz_ = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
double tps_[] = {0.0, 1.0, log2(np), (double)tot_sz_};
Expand Down Expand Up @@ -568,7 +568,7 @@ namespace CTF_int {
CTF_int::cdealloc(i32_recv_displs);
}
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int64_t tot_sz = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
Expand Down
4 changes: 2 additions & 2 deletions src/redistribution/dgtog_redist_ror.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ void dgtog_reshuffle(int const * sym,
return;
}
#ifdef TUNE
MPI_Barrier(ord_glb_comm.cm);
PMPI_Barrier(ord_glb_comm.cm);
#endif
TAU_FSTART(dgtog_reshuffle);
double st_time = MPI_Wtime();
Expand Down Expand Up @@ -709,7 +709,7 @@ void dgtog_reshuffle(int const * sym,
sr->dealloc(tsr_data);
#endif
#ifdef TUNE
MPI_Barrier(ord_glb_comm.cm);
PMPI_Barrier(ord_glb_comm.cm);
#endif
double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)log2(ord_glb_comm.np), (double)std::max(old_dist.size, new_dist.size)*log2(ord_glb_comm.np)*sr->el_size};
Expand Down
4 changes: 2 additions & 2 deletions src/redistribution/redist.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ namespace CTF_int {

TAU_FSTART(block_reshuffle);
#ifdef TUNE
MPI_Barrier(glb_comm.cm);
PMPI_Barrier(glb_comm.cm);
double st_time = MPI_Wtime();
#endif

Expand Down Expand Up @@ -602,7 +602,7 @@ namespace CTF_int {
cdealloc(reqs);

#ifdef TUNE
MPI_Barrier(glb_comm.cm);
PMPI_Barrier(glb_comm.cm);
double exe_time = MPI_Wtime()-st_time;
tps = (double*)malloc(3*sizeof(double));
tps[0] = exe_time;
Expand Down
20 changes: 10 additions & 10 deletions src/shared/model.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ namespace CTF_int {
int tot_nrcol;

//compute the total number of observations over all processors
MPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm);
PMPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm);

//if there has been more than 16*nparam observations per processor, tune the model
if (tot_nrcol >= 16.*np*nparam){
Expand Down Expand Up @@ -315,7 +315,7 @@ namespace CTF_int {
i_st = nparam;
}
//find the max execution time over all processors
// MPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm);
// PMPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm);
//double chunk = max_time / 1000.;
//printf("%s chunk = %+1.2e\n",name,chunk);

Expand Down Expand Up @@ -406,7 +406,7 @@ namespace CTF_int {
}
int sub_np = np; //std::min(np,32);
MPI_Comm sub_comm;
MPI_Comm_split(cm, rk<sub_np, rk, &sub_comm);
PMPI_Comm_split(cm, rk<sub_np, rk, &sub_comm);
//use only data from the first 32 processors, so that this doesn't take too long
//FIXME: can be smarter but not clear if necessary
if (rk < sub_np){
Expand All @@ -415,13 +415,13 @@ namespace CTF_int {
//all_b will have the bs from each processor vertically stacked as [b_1^T .. b_32^T]^T
double * all_b = (double*)malloc(sizeof(double)*nparam*sub_np);
//gather all Rs from all the processors
MPI_Allgather(R, nparam*nparam, MPI_DOUBLE, all_R, nparam*nparam, MPI_DOUBLE, sub_comm);
PMPI_Allgather(R, nparam*nparam, MPI_DOUBLE, all_R, nparam*nparam, MPI_DOUBLE, sub_comm);
double * Rs = (double*)malloc(sizeof(double)*nparam*nparam*sub_np);
for (int i=0; i<sub_np; i++){
lda_cpy(sizeof(double), nparam, nparam, nparam, sub_np*nparam, (const char *)(all_R+i*nparam*nparam), (char*)(Rs+i*nparam));
}
//gather all bs from all the processors
MPI_Allgather(b, nparam, MPI_DOUBLE, all_b, nparam, MPI_DOUBLE, sub_comm);
PMPI_Allgather(b, nparam, MPI_DOUBLE, all_b, nparam, MPI_DOUBLE, sub_comm);
free(b);
free(all_R);
free(R);
Expand Down Expand Up @@ -485,7 +485,7 @@ namespace CTF_int {
}
MPI_Comm_free(&sub_comm);
//broadcast new coefficient guess
MPI_Bcast(coeff_guess, nparam, MPI_DOUBLE, 0, cm);
PMPI_Bcast(coeff_guess, nparam, MPI_DOUBLE, 0, cm);
/*for (int i=0; i<nparam; i++){
regularization[i] = coeff_guess[i]*REG_LAMBDA;
}*/
Expand All @@ -497,9 +497,9 @@ namespace CTF_int {
double tot_time_total;
double over_time_total;
double under_time_total;
MPI_Allreduce(&tot_time, &tot_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
MPI_Allreduce(&over_time, &over_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
MPI_Allreduce(&under_time, &under_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&tot_time, &tot_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&over_time, &over_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&under_time, &under_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);


// NOTE: to change the minimum number of observations and the threshold,
Expand Down Expand Up @@ -729,7 +729,7 @@ namespace CTF_int {
ofs.close();
}
rank++;
MPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(MPI_COMM_WORLD);
}
}

Expand Down
Loading