Skip to content

Commit

Permalink
tttp fully integrated
Browse files Browse the repository at this point in the history
  • Loading branch information
raghavendrak committed Jul 9, 2024
1 parent bfbbb88 commit 81996d2
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 58 deletions.
11 changes: 5 additions & 6 deletions examples/spttn_tucker_solve_kernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ bool execute_spttn_kernel(int n, int ur, int vr, int wr,
term id 0: 4 8 16 32
term id 1: 4 2 8 16
term id 2: 4 2 1 8
term id 3: 4 1 2
term id 3: 4 2 1
niloops: 6
i: 1 j: 2 k: 4 a: 8 b: 16 c: 32
Expand All @@ -424,10 +424,8 @@ bool execute_spttn_kernel(int n, int ur, int vr, int wr,
buf2[a] += buf[a,b,c] * V[b,j]
for i:
for a:
buf[i] += buf2[a] * U[a,i]
for i:
for j:
Z_ijk += buf[i] * T[i,j,k]
buf += buf2[a] * U[a,i]
Z_ijk += buf * T[i,j,k]
*/

Expand Down Expand Up @@ -459,7 +457,8 @@ bool execute_spttn_kernel(int n, int ur, int vr, int wr,
if (dw.rank == 0) printf("ijk,ai,bj,ck,abc->ijk using SpTTN-Cyclops (NOTE that it includes CSF construction time; please see total time to calculate printed above): %1.2lf\n", (etime - stime));

stime = MPI_Wtime();
UCxx["ijk"] = T["ijk"] * U["ai"] * V["bj"] * W["ck"] * C["abc"];
UCxx["ijk"] = T["ijk"];
UCxx["ijk"] += T["ijk"] * U["ai"] * V["bj"] * W["ck"] * C["abc"];
etime = MPI_Wtime();
if (dw.rank == 0) printf("ijk,ai,bj,ck,abc->ijk using CTF: %1.2lf\n", (etime - stime));

Expand Down
1 change: 1 addition & 0 deletions src/spttn_cyclops/csf.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ namespace CTF_int {
ptr[j][it[j]] = it[j-1];
}
dt = pairs;
dt_sp_op = nullptr;
}

int64_t get_child_ptr(int level,
Expand Down
19 changes: 19 additions & 0 deletions src/spttn_cyclops/execute_kernel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ namespace CTF_int {
// IASSERT(termx.blas_kernel == RECURSIVE_LOOP);
double * dX = (double *)Bs[termx.X];
double * dY = (double *)Bs[termx.Y];
// assert failure if the buffer or the output is not dense
// IASSERT(dY != nullptr);
if (termx.ALPHA == -1) {
// IASSERT(termx.Y != (nBs-1));
double alpha = A_tree->dt[tree_pt_st].d;
Expand Down Expand Up @@ -157,6 +159,10 @@ namespace CTF_int {
{
int iidx = level;
int idx = term.index_order[iidx];
if (idx == -1) {
// scalar contraction
idx = term.index_order[iidx-1];
}
switch (term.break_rec_idx[idx]) {
case SPARSE_xAXPY: {
double * dY = (double *)Bs[term.Y];
Expand Down Expand Up @@ -284,6 +290,19 @@ namespace CTF_int {
}
}
break;
case SCALAR: {
double * dX = (double *)Bs[term.X];
double * dY = (double *)Bs[term.Y];
if (term.ALPHA == -1 && dY == nullptr) {
double alpha = A_tree->dt[tree_pt_st].d;
A_tree->dt_sp_op[tree_pt_st].d += alpha * *dX;
}
else {
// handle the cases where either the output or the buffer is sparse
IASSERT(0);
}
}
break;
default: {
IASSERT(0);
}
Expand Down
42 changes: 1 addition & 41 deletions src/spttn_cyclops/execute_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ namespace CTF_int{
blas_idx = -1;
tbuffer_sz = -1;
break_rec_idx = (int *)CTF_int::alloc(sizeof(int) * (num_indices+1));
blas_kernel = 0; // NOT_SET

index_order = (int *)CTF_int::alloc(sizeof(int) * (num_indices+1));
std::fill_n(index_order, (num_indices+1), -1);
Expand Down Expand Up @@ -192,46 +193,5 @@ namespace CTF_int{
int nterms,
int num_indices,
int64_t ** lda_Bs);
#ifdef YET_TO_COMPILE
#ifdef OLD_CODE

/*
void gen_inv_idx(int order_A,
int const * idx_A,
int nBs,
int * order_Bs,
const int * const * idx_Bs,
int * order_tot,
int *** idx_arr);
*/

/*
* this function can be invoked from gen_contraction::execute()
* definition visible to the compiler
*/
/*
template <typename dtype>
void traverse_CSF(CSF<dtype> * A_tree) {}
*/

void dnBs_loop(char const * alpha,
int nBs,
char ** Bs,
const algstrct * const * sr_Bs,
const int64_t * const * lda_Bs,
bivar_function const * func,
const int * const * rev_idx_map,
double dt_AB,
std::vector<std::pair<int, int64_t> > nidx_Bs,
std::vector<int> tidx_Bs);


void optimize_contraction_order(std::vector<std::pair<int, int64_t> > & nidx_Bs,
int nBs,
const int * const * rev_idx_map,
std::vector<int> & tidx_Bs);

#endif
#endif
}
#endif
6 changes: 2 additions & 4 deletions src/spttn_cyclops/prepare_kernel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -428,16 +428,14 @@ namespace CTF_int {
for (int i = 0; i < nterms; i++) {
contraction_terms<dtype> & term = terms[i];
std::fill_n(term.break_rec_idx, num_indices+1, RECURSIVE_LOOP);
int idx = term.index_order[term.index_order_sz-1];
if (term.blas_kernel == SCALAR) {
int idx = term.index_order[term.index_order_sz-1];
term.break_rec_idx[idx] = SCALAR;
}
else if (term.blas_kernel == RECURSIVE_LOOP) {
int idx = term.index_order[term.index_order_sz-1];
term.break_rec_idx[idx] = RECURSIVE_LOOP;
}
else {
IASSERT(term.blas_kernel != SCALAR);
IASSERT(term.blas_kernel != -1);
term.break_rec_idx[term.blas_idx] = term.blas_kernel;
}
Expand Down Expand Up @@ -510,7 +508,7 @@ namespace CTF_int {
if (A->wrld->rank == 0) printf("output redistribution total time: %1.2lf\n", (etime - stime));
}
else {
IASSERT(0);
// no need to redistribute output
}
if (A->wrld->rank == 0) printf("total time to calculate: %1.2lf\n", (tot_time));

Expand Down
19 changes: 12 additions & 7 deletions src/spttn_cyclops/prepare_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,18 @@ namespace CTF_int {
std::cout << "prepare blas kernels: term_id: " << i << std::endl;
contraction_terms<dtype> & term = terms[i];
switch(term.blas_kernel) {
case RECURSIVE_LOOP: {
// use all the cases of SCALAR; need to have blas_kernel set to RECUSIVE_LOOP to handle when executing the contraction
if (rank == 0) std::cout << "term_id: " << i << " blas_kernel: " << "RECURSIVE_LOOP" << std::endl;
case NOT_SET: {
// use all the cases of RECURSIVE_LOOP
term.blas_kernel = RECURSIVE_LOOP;
if (rank == 0) std::cout << "term_id: " << i << " blas_kernel: " << "NOT_SET" << std::endl;
}
case SCALAR: {
// TODO: i==0 is dependent on contracting the tree first; do away with this dependency by just checking if the main sparse tensor is in the term
// use all the cases of SCALAR; need to have blas_kernel set to RECUSIVE_LOOP to handle when executing the contraction
if (rank == 0) std::cout << "term_id: " << i << " blas_kernel: " << "SCALAR" << std::endl;
}
case RECURSIVE_LOOP: {
// TODO: i==0 is dependent on contracting the tree first; do away with this dependency by just checking if the main sparse tensor is in the term
if (rank == 0) std::cout << "term_id: " << i << " blas_kernel: " << "RECURSIVE_LOOP" << std::endl;
if (i == 0) {
// two dense factors are contracted in the first term
if (term.Bs_in_term[nBs] == false) {
Expand Down Expand Up @@ -650,8 +655,8 @@ namespace CTF_int {
}
}
else {
// TODO:
IASSERT(0);
// TODO: tucker_solve TTTP term 1 a <- abc bj
term.blas_kernel = RECURSIVE_LOOP;
}
}
else if (num_idx == 3) {
Expand Down Expand Up @@ -915,7 +920,7 @@ namespace CTF_int {
}
}
if (nidx_term[2] == 0) {
IASSERT(0);
std::cout << "term_id: " << i << " nidx_term[0]: " << nidx_term[0] << " nidx_term[1]: " << nidx_term[1] << " nidx_term[2]: " << nidx_term[2] << " inner_idx: " << terms[i].inner_idx << " reset_idx: " << terms[i].reset_idx << std::endl;
IASSERT(terms[i].inner_idx != -1);
/*
for i:
Expand Down

0 comments on commit 81996d2

Please sign in to comment.