Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/airmler/ctf
Browse files Browse the repository at this point in the history
  • Loading branch information
airmler committed Mar 16, 2022
2 parents d80aab8 + 88c1013 commit 59c7ffb
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 2 deletions.
13 changes: 12 additions & 1 deletion src/contraction/contraction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -4485,6 +4485,12 @@ namespace CTF_int {
#endif
TAU_FSTART(ctr_func);
/* Invoke the contraction algorithm */
TAU_FSTART(blockComm);
std::vector<int> swap;
ctrf->blockComm( A->topo->lens, A->data, B->data, C->data
, A->size, B->size, C->size, global_comm, swap);
MPI_Barrier(global_comm.cm);
TAU_FSTOP(blockComm);
A->topo->activate();

#ifdef PROFILE_MEMORY
Expand Down Expand Up @@ -4569,7 +4575,12 @@ namespace CTF_int {
#endif


A->topo->deactivate();
// A->topo->deactivate();
TAU_FSTART(blockComm);
MPI_Barrier(global_comm.cm);
ctrf->blockComm( A->topo->lens, A->data, B->data, C->data
, A->size, B->size, C->size, global_comm, swap);
TAU_FSTOP(blockComm);

#ifdef PROFILE
TAU_FSTART(post_ctr_func_barrier);
Expand Down
108 changes: 108 additions & 0 deletions src/contraction/ctr_2d_general.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -472,5 +472,113 @@ namespace CTF_int {
}
TAU_FSTOP(ctr_2d_general);
}

void ctr_2d_general::blockComm( int const * rgrid, char *A, char *B, char *C
, size_t sizeA, size_t sizeB, size_t sizeC
, CommData glb_comm, std::vector<int> &swap
){
int rank = glb_comm.rank;
int np = glb_comm.np;
int src, dst;
// we have to determine the partners
if (! swap.size() ) {
ipair nr(getNumNodes(glb_comm.cm));
// rGrid is the rankGrid of the given tensor topology
CommGrid grid({rgrid[0], rgrid[1]}, nr.first);
ipair nGrid = grid.nGrid;
ipair iGrid = grid.iGrid;
// rr is the key/color pair for the original rank distribution of dim_comm[0]
std::vector<ipair> rr(np);
std::vector< std::pair<ipair, int> > perm(np);
for (int r(0); r < np; r++) rr[r] = { r % rgrid[0], r / rgrid[0] };
// the desired distribution are nGrid[0] x nGrid[1] blocks with the some color
for (int r(0); r < np; r++){
// the color is the jth column and kth row in the nodeGrid
int clr = (rr[r].second/iGrid.second)*nGrid.first + rr[r].first/iGrid.first;
int key = (rr[r].second%iGrid.second)*iGrid.first + rr[r].first%iGrid.first;
// we have to swap color and key that we can use std::sort
perm[r] = { { clr, key }, r};
}
std::sort(perm.begin(), perm.end());
for (auto p: perm) swap.push_back(p.second);

src = swap[rank];
auto it( std::find(swap.begin(), swap.end(), rank) );
dst = std::distance(swap.begin(), it);
}
else {
dst = swap[rank];
auto it( std::find(swap.begin(), swap.end(), rank) );
src = std::distance(swap.begin(), it);
}

MPI_Barrier(glb_comm.cm);
MPI_Status s;
MPI_Sendrecv_replace(&cdt_A->color, 1, MPI_INT, dst, 0, src, 0, glb_comm.cm, &s);
MPI_Sendrecv_replace(&cdt_B->color, 1, MPI_INT, dst, 0, src, 0, glb_comm.cm, &s);
MPI_Sendrecv_replace(&cdt_A->rank, 1, MPI_INT, dst, 0, src, 0, glb_comm.cm, &s);
MPI_Sendrecv_replace(&cdt_B->rank, 1, MPI_INT, dst, 0, src, 0, glb_comm.cm, &s);

MPI_Barrier(glb_comm.cm);

size_t el(std::max(sizeA, sizeB));
el = std::max(el, sizeC);
char *buf = new char[el*sr_A->el_size];
// Do the A job
MPI_Request sreq, rreq;
MPI_Irecv(buf, sizeA, sr_A->mdtype(), src, 0, glb_comm.cm, &rreq);
MPI_Isend(A, sizeA, sr_A->mdtype(), dst, 0, glb_comm.cm, &sreq);
MPI_Wait(&rreq, MPI_STATUS_IGNORE);
MPI_Wait(&sreq, MPI_STATUS_IGNORE);
memcpy(A, buf, sizeA*sr_A->el_size);

// Do the B job
MPI_Irecv(buf, sizeB, sr_A->mdtype(), src, 0, glb_comm.cm, &rreq);
MPI_Isend(B, sizeB, sr_A->mdtype(), dst, 0, glb_comm.cm, &sreq);
MPI_Wait(&rreq, MPI_STATUS_IGNORE);
MPI_Wait(&sreq, MPI_STATUS_IGNORE);
memcpy(B, buf, sizeB*sr_A->el_size);

// Do the B job
MPI_Irecv(buf, sizeC, sr_A->mdtype(), src, 0, glb_comm.cm, &rreq);
MPI_Isend(C, sizeC, sr_A->mdtype(), dst, 0, glb_comm.cm, &sreq);
MPI_Wait(&rreq, MPI_STATUS_IGNORE);
MPI_Wait(&sreq, MPI_STATUS_IGNORE);
memcpy(C, buf, sizeC*sr_A->el_size);
MPI_Barrier(glb_comm.cm);
}

ipair ctr_2d_general::getNumNodes(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &np);

std::vector<std::string> nodeList(np);
char nodeName[MPI_MAX_PROCESSOR_NAME];
char nodeNames[np*MPI_MAX_PROCESSOR_NAME];
std::vector<int> nameLengths(np);
std::vector<int> off(np);
int nameLength;
MPI_Get_processor_name(nodeName, &nameLength);
MPI_Allgather(
&nameLength, 1, MPI_INT, nameLengths.data(), 1, MPI_INT, comm
);
for (int i(1); i < np; i++) off[i] = off[i-1] + nameLengths[i-1];
MPI_Allgatherv(
nodeName, nameLengths[rank], MPI_BYTE, nodeNames,
nameLengths.data(), off.data(), MPI_BYTE, comm
);
for (int i(0); i < np; i++) {
std::string s(&nodeNames[off[i]], nameLengths[i]);
nodeList[i] = s;
}
std::sort(nodeList.begin(), nodeList.end());
std::vector<std::string>::iterator it(
std::unique(nodeList.begin(), nodeList.end())
);
size_t nNodes(std::distance(nodeList.begin(), it));
return {nNodes, np/nNodes};
}

}

15 changes: 15 additions & 0 deletions src/contraction/ctr_2d_general.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ namespace CTF_int{
CommData * cdt_A;
CommData * cdt_B;
CommData * cdt_C;


/* Class to be called on sub-blocks */
ctr * rec_ctr;

Expand All @@ -81,6 +83,19 @@ namespace CTF_int{
* where b is the smallest blocking factor among A and B or A and C or B and C.
*/
void run(char * A, char * B, char * C);
/**
* \brief interchanges processors in the communicator -> permuting
* the data such that each communicator has adjacent global ranks
*/
void blockComm( int const *rgrid, char *A, char *B, char *C
, size_t sizeA, size_t sizeB, size_t sizeC
, CommData globalComm, std::vector<int> &swap);

/**
* \brief returns the number of nodes & number of ranks per node
* note: only trustworthy if ranks per node is the same for all nodes!!
*/
ipair getNumNodes(MPI_Comm comm);
/**
* \brief returns the number of bytes of buffer space
* we need
Expand Down
4 changes: 3 additions & 1 deletion src/contraction/ctr_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ namespace CTF_int{
virtual double est_time_fp(int nlyr) { return 0; };
virtual double est_time_rec(int nlyr) { return est_time_fp(nlyr); };
virtual ctr * clone() { return NULL; };

virtual void blockComm( int const *rgrid, char *A, char *B, char *C
, size_t sizeA, size_t sizeB, size_t sizeC
, CommData globalComm, std::vector<int> &swap) {};
/**
* \brief deallocates generic ctr object
*/
Expand Down
107 changes: 107 additions & 0 deletions src/interface/common.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,10 @@ namespace CTF_int {
#ifdef TUNE
double st_time = MPI_Wtime();
#endif
TAU_FSTART(bcast);
MPI_Bcast(buf, count, mdtype, root, cm);
MPI_Barrier(cm);
TAU_FSTOP(bcast);
#ifdef TUNE
MPI_Barrier(cm);
double exe_time = MPI_Wtime()-st_time;
Expand Down Expand Up @@ -571,6 +574,110 @@ namespace CTF_int {
alltoallv_mdl.observe(tps);
}


CommGrid::CommGrid(ipair _rGrid, int _nNodes){
nRanks = _rGrid.first*_rGrid.second;
colorKey.resize(nRanks);
nGrid = getNodeGrid(_nNodes, _rGrid);
rGrid = _rGrid;
iGrid.first = rGrid.first / nGrid.first;
iGrid.second = rGrid.second / nGrid.second;
assert(colorKey.size() == iGrid.first*iGrid.second*_nNodes);
}

ipair CommGrid::getNodeGrid(int nNodes, ipair rGrid){
ipair nGrid({1, 1});
std::vector<int> facNodes(CommGrid::factorize(nNodes));
std::vector<int> facrgf(CommGrid::factorize(rGrid.first));
std::vector<int> facrgs(CommGrid::factorize(rGrid.second));
std::vector<int> diff;

// We are selecting all prim factors of #nodes
// which do not occur in the prim factors of a grid edge
// we remove these factors and assign them to the opponent grid edge

std::set_difference( facNodes.begin(), facNodes.end()
, facrgf.begin(), facrgf.end()
, std::back_inserter(diff)
);

for (auto d: diff)
facNodes.erase(std::find(facNodes.begin(), facNodes.end(), d));

nGrid.second =
std::accumulate(diff.begin(), diff.end(), 1, std::multiplies<int>());
diff.resize(0);

std::set_difference( facNodes.begin(), facNodes.end()
, facrgs.begin(), facrgs.end()
, std::back_inserter(diff)
);
for (auto d: diff)
facNodes.erase(std::find(facNodes.begin(), facNodes.end(), d));

nGrid.first =
std::accumulate(diff.begin(), diff.end(), 1, std::multiplies<int>());

// if there is no element left, all prim factors are distributed
if (!facNodes.size()) return nGrid;
//assign the remaining prim factors as such that the grid on every
//node is closest possible to a square
double minVal(DBL_MAX);
ipair bestPair;
for (int i(0); i < pow(2, facNodes.size()); i++){
ipair edges(CommGrid::getSquare(i, facNodes));
// build igrid.first / igrid.second and take the one with
// a ratio closest to one
//its not true that the node grid candidates are divisor of the rGrid:
//we allow only these edges
int first(edges.first*nGrid.first);
int second(edges.second*nGrid.second);
if ( (nRanks/first)*first != nRanks) continue;
if ( (nRanks/second)*second != nRanks) continue;

double val(1.0/(double)first + 1.0/(double)second);
if ( minVal > val ){
minVal = val;
bestPair = {edges.first, edges.second};
}
}
nGrid.first *= bestPair.first;
nGrid.second *= bestPair.second;
return nGrid;
}

std::vector<int> CommGrid::factorize(int number){
std::vector<int> factors;
int n(number);
if (n < 4) factors.push_back(n);
int d(2);
while (d*d <= n)
while (n>1){
while (!(n%d)){
factors.push_back(d);
n /= d;
}
d++;
}
return factors;
}

ipair CommGrid::getSquare(int id, std::vector<int> factors) {
ipair result({1,1});
result.second = std::accumulate(
factors.begin(), factors.end(), 1, std::multiplies<double>()
);
for (int pos(0); ; pos++) {
int bit(pow(2,pos));
if (bit > id) break;
if(id & bit) result.first *= factors[pos];
}
result.second /= result.first;
return result;
}



char * get_default_inds(int order, int start_index){
char * inds = (char*)CTF_int::alloc(order*sizeof(char));
for (int i=0; i<order; i++){
Expand Down
19 changes: 19 additions & 0 deletions src/interface/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <iostream>
#include <limits.h>
#include <random>
#include <cfloat>

#include "../shared/model.h"

Expand Down Expand Up @@ -241,6 +242,24 @@ namespace CTF_int {

};

using ipair = std::pair<int,int>;
struct CommGrid {
CommGrid(){};
~CommGrid(){};
CommGrid(ipair _rGrid, int _nNodes);

int nRanks;
std::vector<ipair> colorKey;
ipair rGrid; // RankGrid: given by the user
ipair nGrid; // NodeGrid: output, grid of nodes
ipair iGrid; // intraNodeGrid: the ranks of one node possess this grid

ipair getNodeGrid(int nNodes, ipair rGrid);
std::vector<int> factorize(int number);
ipair getSquare(int id, std::vector<int> factors);
};


int alloc_ptr(int64_t len, void ** const ptr);
int mst_alloc_ptr(int64_t len, void ** const ptr);
void * alloc(int64_t len);
Expand Down

0 comments on commit 59c7ffb

Please sign in to comment.