Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove 1 feature #155

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/contraction/ctr_tsr.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ namespace CTF_int {

double seq_tsr_ctr::est_time_fp(int nlyr){
//return COST_MEMBW*(size_A+size_B+size_C)+COST_FLOP*flops;
double ps[] = {1.0, (double)est_membw(), est_fp()};
double ps[] = {(double)est_membw(), est_fp()};
// printf("time estimate is %lf\n", seq_tsr_ctr_mdl.est_time(ps));
if (is_custom && !is_inner){
return seq_tsr_ctr_mdl_cst.est_time(ps);
Expand Down Expand Up @@ -468,11 +468,11 @@ namespace CTF_int {
// Check if we need to execute this function for the sake of training
bool sr;
if (is_custom && !is_inner){
double tps[] = {0, 1.0, (double)est_membw(), est_fp()};
double tps[] = {0, (double)est_membw(), est_fp()};
sr = seq_tsr_ctr_mdl_cst.should_observe(tps);
} else if (is_inner){
ASSERT(is_custom || func == NULL);
double tps[] = {0.0, 1.0, (double)est_membw(), est_fp()};
double tps[] = {0.0, (double)est_membw(), est_fp()};
if (is_custom){
if (inner_params.offload)
sr = seq_tsr_ctr_mdl_cst_off.should_observe(tps);
Expand All @@ -486,7 +486,7 @@ namespace CTF_int {
}

} else {
double tps[] = {0.0, 1.0, (double)est_membw(), est_fp()};
double tps[] = {0.0, (double)est_membw(), est_fp()};
sr = seq_tsr_ctr_mdl_ref.should_observe(tps);
}

Expand Down Expand Up @@ -517,11 +517,11 @@ namespace CTF_int {
idx_map_C,
func);
double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()};
double tps[] = {exe_time, (double)est_membw(), est_fp()};
seq_tsr_ctr_mdl_cst.observe(tps);
} else if (is_inner){
ASSERT(is_custom || func == NULL);
// double ps[] = {1.0, (double)est_membw(), est_fp()};
// double ps[] = {(double)est_membw(), est_fp()};
// double est_time = seq_tsr_ctr_mdl_inr.est_time(ps);
double st_time = MPI_Wtime();
sym_seq_ctr_inr(this->alpha,
Expand All @@ -548,7 +548,7 @@ namespace CTF_int {
func);
double exe_time = MPI_Wtime()-st_time;
// printf("exe_time = %E est_time = %E abs_err = %e rel_err = %lf\n", exe_time,est_time,fabs(exe_time-est_time),fabs(exe_time-est_time)/exe_time);
double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()};
double tps[] = {exe_time, (double)est_membw(), est_fp()};
if (is_custom){
if (inner_params.offload)
seq_tsr_ctr_mdl_cst_off.observe(tps);
Expand Down Expand Up @@ -584,7 +584,7 @@ namespace CTF_int {
sym_C,
idx_map_C);
double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()};
double tps[] = {exe_time, (double)est_membw(), est_fp()};
seq_tsr_ctr_mdl_ref.observe(tps);
}
}
Expand Down
54 changes: 27 additions & 27 deletions src/contraction/spctr_tsr.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -234,28 +234,28 @@ namespace CTF_int {
return size_A+size_B+size_C;
}

LinModel<3> seq_tsr_spctr_cst_off_k0(seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0");
LinModel<3> seq_tsr_spctr_cst_off_k1(seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1");
LinModel<3> seq_tsr_spctr_cst_off_k2(seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2");
LinModel<3> seq_tsr_spctr_off_k0(seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0");
LinModel<3> seq_tsr_spctr_off_k1(seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1");
LinModel<3> seq_tsr_spctr_off_k2(seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2");
LinModel<3> seq_tsr_spctr_cst_k0(seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0");
LinModel<3> seq_tsr_spctr_cst_k1(seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1");
LinModel<3> seq_tsr_spctr_cst_k2(seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2");
LinModel<3> seq_tsr_spctr_cst_k3(seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3");
LinModel<3> seq_tsr_spctr_cst_k4(seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4");
LinModel<3> seq_tsr_spctr_cst_k5(seq_tsr_spctr_cst_k5_init,"seq_tsr_spctr_cst_k5");
LinModel<3> seq_tsr_spctr_k0(seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0");
LinModel<3> seq_tsr_spctr_k1(seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1");
LinModel<3> seq_tsr_spctr_k2(seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2");
LinModel<3> seq_tsr_spctr_k3(seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3");
LinModel<3> seq_tsr_spctr_k4(seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4");
LinModel<3> seq_tsr_spctr_k5(seq_tsr_spctr_k5_init,"seq_tsr_spctr_k5");
LinModel<2> seq_tsr_spctr_cst_off_k0(seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0");
LinModel<2> seq_tsr_spctr_cst_off_k1(seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1");
LinModel<2> seq_tsr_spctr_cst_off_k2(seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2");
LinModel<2> seq_tsr_spctr_off_k0(seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0");
LinModel<2> seq_tsr_spctr_off_k1(seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1");
LinModel<2> seq_tsr_spctr_off_k2(seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2");
LinModel<2> seq_tsr_spctr_cst_k0(seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0");
LinModel<2> seq_tsr_spctr_cst_k1(seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1");
LinModel<2> seq_tsr_spctr_cst_k2(seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2");
LinModel<2> seq_tsr_spctr_cst_k3(seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3");
LinModel<2> seq_tsr_spctr_cst_k4(seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4");
LinModel<2> seq_tsr_spctr_cst_k5(seq_tsr_spctr_cst_k5_init,"seq_tsr_spctr_cst_k5");
LinModel<2> seq_tsr_spctr_k0(seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0");
LinModel<2> seq_tsr_spctr_k1(seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1");
LinModel<2> seq_tsr_spctr_k2(seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2");
LinModel<2> seq_tsr_spctr_k3(seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3");
LinModel<2> seq_tsr_spctr_k4(seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4");
LinModel<2> seq_tsr_spctr_k5(seq_tsr_spctr_k5_init,"seq_tsr_spctr_k5");

double seq_tsr_spctr::est_time_fp(int nlyr, int nblk_A, int nblk_B, int nblk_C, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C){
// return COST_MEMBW*(size_A+size_B+size_C)+COST_FLOP*flops;
double ps[] = {1.0, (double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_A, nnz_frac_B, nnz_frac_C)};
double ps[] = {(double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_A, nnz_frac_B, nnz_frac_C)};
switch (krnl_type){
case 0:
if (is_custom){
Expand Down Expand Up @@ -378,7 +378,7 @@ namespace CTF_int {

}

double tps_[] = {0.0, 1.0, (double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_B, nnz_frac_B, nnz_frac_C)};
double tps_[] = {0.0, (double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_B, nnz_frac_B, nnz_frac_C)};
// Check if we need to execute this function for the sake of training
bool bsr = true;
switch (krnl_type){
Expand Down Expand Up @@ -550,7 +550,7 @@ namespace CTF_int {
}

double exe_time = MPI_Wtime() - st_time;
double tps[] = {exe_time, 1.0, (double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_B, nnz_frac_B, nnz_frac_C)};
double tps[] = {exe_time, (double)est_membw(nnz_frac_A, nnz_frac_B, nnz_frac_C), est_fp(nnz_frac_B, nnz_frac_B, nnz_frac_C)};
switch (krnl_type){
case 0:
if (is_custom){
Expand Down Expand Up @@ -995,22 +995,22 @@ namespace CTF_int {
}
}

LinModel<2> pin_keys_mdl(pin_keys_mdl_init,"pin_keys_mdl");
LinModel<1> pin_keys_mdl(pin_keys_mdl_init,"pin_keys_mdl");
double spctr_pin_keys::est_time_fp(int nlyr, int nblk_A, int nblk_B, int nblk_C, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C) {
switch (AxBxC){
case 0:
{
double ps[] = {1.0, dns_blk_sz*nnz_frac_A};
double ps[] = {dns_blk_sz*nnz_frac_A};
return pin_keys_mdl.est_time(ps);
}
case 1:
{
double ps[] = {1.0, dns_blk_sz*nnz_frac_B};
double ps[] = {dns_blk_sz*nnz_frac_B};
return pin_keys_mdl.est_time(ps);
}
case 2:
{
double ps[] = {1.0, dns_blk_sz*nnz_frac_C};
double ps[] = {dns_blk_sz*nnz_frac_C};
return 2.*pin_keys_mdl.est_time(ps);
}
}
Expand Down Expand Up @@ -1084,7 +1084,7 @@ namespace CTF_int {
pi.pin(nnz, order, lens, divisor, pi_new);

double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)nnz};
double tps[] = {exe_time, (double)nnz};
pin_keys_mdl.observe(tps);

TAU_FSTOP(spctr_pin_keys);
Expand All @@ -1109,7 +1109,7 @@ namespace CTF_int {
}
depin(sr_C, order, lens, divisor, nblk_C, virt_dim, phys_rank, new_C, new_nnz_C, size_blk_C, new_C, true);
double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)nnz};
double tps[] = {exe_time, (double)nnz};
pin_keys_mdl.observe(tps);
break;
}
Expand Down
40 changes: 20 additions & 20 deletions src/interface/common.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,19 @@ namespace CTF_int {


//static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW};
LinModel<3> alltoall_mdl(alltoall_mdl_init,"alltoall_mdl");
LinModel<3> alltoallv_mdl(alltoallv_mdl_init,"alltoallv_mdl");
LinModel<2> alltoall_mdl(alltoall_mdl_init,"alltoall_mdl");
LinModel<2> alltoallv_mdl(alltoallv_mdl_init,"alltoallv_mdl");

#ifdef BGQ
//static double init_lg_mdl[] = {COST_LATENCY, COST_LATENCY, 0.0, COST_NETWBW + 2.0*COST_MEMBW};
#else
//static double init_lg_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW + 2.0*COST_MEMBW, 0.0};
#endif
LinModel<3> red_mdl(red_mdl_init,"red_mdl");
LinModel<3> red_mdl_cst(red_mdl_cst_init,"red_mdl_cst");
LinModel<3> allred_mdl(allred_mdl_init,"allred_mdl");
LinModel<3> allred_mdl_cst(allred_mdl_cst_init,"allred_mdl_cst");
LinModel<3> bcast_mdl(bcast_mdl_init,"bcast_mdl");
LinModel<2> red_mdl(red_mdl_init,"red_mdl");
LinModel<2> red_mdl_cst(red_mdl_cst_init,"red_mdl_cst");
LinModel<2> allred_mdl(allred_mdl_init,"allred_mdl");
LinModel<2> allred_mdl_cst(allred_mdl_cst_init,"allred_mdl_cst");
LinModel<2> bcast_mdl(bcast_mdl_init,"bcast_mdl");


template <typename type>
Expand Down Expand Up @@ -339,20 +339,20 @@ namespace CTF_int {
}

double CommData::estimate_bcast_time(int64_t msg_sz){
double ps[] = {1.0, log2((double)np), (double)msg_sz};
double ps[] = {log2((double)np), (double)msg_sz};
return bcast_mdl.est_time(ps);
}

double CommData::estimate_allred_time(int64_t msg_sz, MPI_Op op){
double ps[] = {1.0, log2((double)np), (double)msg_sz*log2((double)(np))};
double ps[] = {log2((double)np), (double)msg_sz*log2((double)(np))};
if (op >= MPI_MAX && op <= MPI_REPLACE)
return allred_mdl.est_time(ps);
else
return allred_mdl_cst.est_time(ps);
}

double CommData::estimate_red_time(int64_t msg_sz, MPI_Op op){
double ps[] = {1.0, log2((double)np), (double)msg_sz*log2((double)(np))};
double ps[] = {log2((double)np), (double)msg_sz*log2((double)(np))};
if (op >= MPI_MAX && op <= MPI_REPLACE)
return red_mdl.est_time(ps);
else
Expand All @@ -369,12 +369,12 @@ namespace CTF_int {


double CommData::estimate_alltoall_time(int64_t chunk_sz) {
double ps[] = {1.0, log2((double)np), log2((double)np)*np*chunk_sz};
double ps[] = {log2((double)np), log2((double)np)*np*chunk_sz};
return alltoall_mdl.est_time(ps);
}

double CommData::estimate_alltoallv_time(int64_t tot_sz) {
double ps[] = {1.0, log2((double)np), log2((double)np)*tot_sz};
double ps[] = {log2((double)np), log2((double)np)*tot_sz};
return alltoallv_mdl.est_time(ps);
}

Expand All @@ -385,7 +385,7 @@ namespace CTF_int {

int tsize_;
MPI_Type_size(mdtype, &tsize_);
double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_};
double tps_[] = {0.0, log2(np), ((double)count)*tsize_};
if (!bcast_mdl.should_observe(tps_)) return;
#endif

Expand All @@ -398,7 +398,7 @@ namespace CTF_int {
double exe_time = MPI_Wtime()-st_time;
int tsize;
MPI_Type_size(mdtype, &tsize);
double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize};
double tps[] = {exe_time, log2(np), ((double)count)*tsize};
bcast_mdl.observe(tps);
#endif
}
Expand All @@ -411,7 +411,7 @@ namespace CTF_int {
#ifdef TUNE
int tsize_;
MPI_Type_size(mdtype, &tsize_);
double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))};
double tps_[] = {0.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))};
bool bsr = true;
if (op >= MPI_MAX && op <= MPI_REPLACE)
bsr = allred_mdl.should_observe(tps_);
Expand All @@ -428,7 +428,7 @@ namespace CTF_int {
double exe_time = MPI_Wtime()-st_time;
int tsize;
MPI_Type_size(mdtype, &tsize);
double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))};
double tps[] = {exe_time, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))};
if (op >= MPI_MAX && op <= MPI_REPLACE)
allred_mdl.observe(tps);
else
Expand All @@ -442,7 +442,7 @@ namespace CTF_int {
// change-of-observe
int tsize_;
MPI_Type_size(mdtype, &tsize_);
double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))};
double tps_[] = {0.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))};
bool bsr = true;
if (op >= MPI_MAX && op <= MPI_REPLACE)
bsr = red_mdl.should_observe(tps_);
Expand All @@ -459,7 +459,7 @@ namespace CTF_int {
double exe_time = MPI_Wtime()-st_time;
int tsize;
MPI_Type_size(mdtype, &tsize);
double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))};
double tps[] = {exe_time, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))};
if (op >= MPI_MAX && op <= MPI_REPLACE)
red_mdl.observe(tps);
else
Expand All @@ -479,7 +479,7 @@ namespace CTF_int {
MPI_Barrier(cm);
// change-of-observe
int64_t tot_sz_ = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
double tps_[] = {0.0, 1.0, log2(np), (double)tot_sz_};
double tps_[] = {0.0, log2(np), (double)tot_sz_};
if (!alltoallv_mdl.should_observe(tps_)) return;
#endif

Expand Down Expand Up @@ -572,7 +572,7 @@ namespace CTF_int {
#endif
double exe_time = MPI_Wtime()-st_time;
int64_t tot_sz = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
double tps[] = {exe_time, 1.0, log2(np), (double)tot_sz};
double tps[] = {exe_time, log2(np), (double)tot_sz};
alltoallv_mdl.observe(tps);
}

Expand Down
4 changes: 2 additions & 2 deletions src/redistribution/dgtog_redist.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
#include "dgtog_bucket.h"
namespace CTF_int {
//static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW};
LinModel<3> dgtog_res_mdl(dgtog_res_mdl_init,"dgtog_res_mdl");
LinModel<2> dgtog_res_mdl(dgtog_res_mdl_init,"dgtog_res_mdl");

double dgtog_est_time(int64_t tot_sz, int np){
double ps[] = {1.0, (double)log2(np), (double)tot_sz*log2(np)};
double ps[] = {(double)log2(np), (double)tot_sz*log2(np)};
return dgtog_res_mdl.est_time(ps);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/redistribution/dgtog_redist_ror.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ void dgtog_reshuffle(int const * sym,
MPI_Barrier(ord_glb_comm.cm);
#endif
double exe_time = MPI_Wtime()-st_time;
double tps[] = {exe_time, 1.0, (double)log2(ord_glb_comm.np), (double)std::max(old_dist.size, new_dist.size)*log2(ord_glb_comm.np)*sr->el_size};
double tps[] = {exe_time, (double)log2(ord_glb_comm.np), (double)std::max(old_dist.size, new_dist.size)*log2(ord_glb_comm.np)*sr->el_size};

// double-check
dgtog_res_mdl.observe(tps);
Expand Down
10 changes: 5 additions & 5 deletions src/redistribution/nosym_transp.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
namespace CTF_int {

//static double init_ct_ps[] = {COST_LATENCY, 1.5*COST_MEMBW};
LinModel<2> long_contig_transp_mdl(long_contig_transp_mdl_init,"long_contig_transp_mdl");
LinModel<2> shrt_contig_transp_mdl(shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl");
LinModel<2> non_contig_transp_mdl(non_contig_transp_mdl_init,"non_contig_transp_mdl");
LinModel<1> long_contig_transp_mdl(long_contig_transp_mdl_init,"long_contig_transp_mdl");
LinModel<1> shrt_contig_transp_mdl(shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl");
LinModel<1> non_contig_transp_mdl(non_contig_transp_mdl_init,"non_contig_transp_mdl");


//#define OPT_NOSYM_TR
Expand Down Expand Up @@ -445,7 +445,7 @@ namespace CTF_int {
}
tot_sz *= nvirt_A;

double tps[] = {0.0, 1.0, (double)tot_sz};
double tps[] = {0.0, (double)tot_sz};
bool should_run = true;
if (contig0 < 4){
should_run = non_contig_transp_mdl.should_observe(tps);
Expand Down Expand Up @@ -507,7 +507,7 @@ namespace CTF_int {
tot_sz *= nvirt_A;

double exe_time = MPI_Wtime() - st_time;
double tps[] = {exe_time, 1.0, (double)tot_sz};
double tps[] = {exe_time, (double)tot_sz};
if (contig0 < 4){
non_contig_transp_mdl.observe(tps);
} else if (contig0 <= 64){
Expand Down
Loading
Loading