diff --git a/configure b/configure index 95da8250..50ae86b0 100755 --- a/configure +++ b/configure @@ -1297,6 +1297,7 @@ $SPMKL_COMMENT #then recompile with parameters without -DTUNE #Note: -DTUNE requires lapack (include -mkl or -llapack in LIBS) and the inclusion of above performance profiling flags #DEFS += -DTUNE +#NOTE: Should also specify the following environment variables: MODEL_TYPE and MAX_NUM_OBSERVATIONS. Possible options for MODEL_TYPE are Global. ### Optional: DEBUGGING AND VERBOSITY #uncomment below to enable CTF execution output (1 for basic contraction information on start-up and contractions) diff --git a/src/contraction/ctr_tsr.cxx b/src/contraction/ctr_tsr.cxx index 45cc994a..f6ddab08 100755 --- a/src/contraction/ctr_tsr.cxx +++ b/src/contraction/ctr_tsr.cxx @@ -389,12 +389,12 @@ namespace CTF_int { int64_t seq_tsr_ctr::mem_fp(){ return 0; } //double seq_tsr_ctr_mig[] = {1e-6, 9.30e-11, 5.61e-10}; - LinModel<3> seq_tsr_ctr_mdl_cst(seq_tsr_ctr_mdl_cst_init,"seq_tsr_ctr_mdl_cst"); - LinModel<3> seq_tsr_ctr_mdl_ref(seq_tsr_ctr_mdl_ref_init,"seq_tsr_ctr_mdl_ref"); - LinModel<3> seq_tsr_ctr_mdl_inr(seq_tsr_ctr_mdl_inr_init,"seq_tsr_ctr_mdl_inr"); - LinModel<3> seq_tsr_ctr_mdl_off(seq_tsr_ctr_mdl_off_init,"seq_tsr_ctr_mdl_off"); - LinModel<3> seq_tsr_ctr_mdl_cst_inr(seq_tsr_ctr_mdl_cst_inr_init,"seq_tsr_ctr_mdl_cst_inr"); - LinModel<3> seq_tsr_ctr_mdl_cst_off(seq_tsr_ctr_mdl_cst_off_init,"seq_tsr_ctr_mdl_cst_off"); + Model* seq_tsr_ctr_mdl_cst = select_model<3>(seq_tsr_ctr_mdl_cst_init,"seq_tsr_ctr_mdl_cst"); + Model* seq_tsr_ctr_mdl_ref = select_model<3>(seq_tsr_ctr_mdl_ref_init,"seq_tsr_ctr_mdl_ref"); + Model* seq_tsr_ctr_mdl_inr = select_model<3>(seq_tsr_ctr_mdl_inr_init,"seq_tsr_ctr_mdl_inr"); + Model* seq_tsr_ctr_mdl_off = select_model<3>(seq_tsr_ctr_mdl_off_init,"seq_tsr_ctr_mdl_off"); + Model* seq_tsr_ctr_mdl_cst_inr = select_model<3>(seq_tsr_ctr_mdl_cst_inr_init,"seq_tsr_ctr_mdl_cst_inr"); + Model* seq_tsr_ctr_mdl_cst_off = select_model<3>(seq_tsr_ctr_mdl_cst_off_init,"seq_tsr_ctr_mdl_cst_off"); uint64_t seq_tsr_ctr::est_membw(){ uint64_t size_A = sy_packed_size(order_A, edge_len_A, sym_A)*sr_A->el_size; @@ -438,21 +438,21 @@ namespace CTF_int { double ps[] = {1.0, (double)est_membw(), est_fp()}; // printf("time estimate is %lf\n", seq_tsr_ctr_mdl.est_time(ps)); if (is_custom && !is_inner){ - return seq_tsr_ctr_mdl_cst.est_time(ps); + return seq_tsr_ctr_mdl_cst->est_time(ps); } else if (is_inner){ if (is_custom){ if (inner_params.offload) - return seq_tsr_ctr_mdl_cst_off.est_time(ps); + return seq_tsr_ctr_mdl_cst_off->est_time(ps); else - return seq_tsr_ctr_mdl_cst_inr.est_time(ps); + return seq_tsr_ctr_mdl_cst_inr->est_time(ps); } else { if (inner_params.offload) - return seq_tsr_ctr_mdl_off.est_time(ps); + return seq_tsr_ctr_mdl_off->est_time(ps); else - return seq_tsr_ctr_mdl_inr.est_time(ps); + return seq_tsr_ctr_mdl_inr->est_time(ps); } } else - return seq_tsr_ctr_mdl_ref.est_time(ps); + return seq_tsr_ctr_mdl_ref->est_time(ps); assert(0); //wont make it here return 0.0; } @@ -469,25 +469,25 @@ namespace CTF_int { bool sr; if (is_custom && !is_inner){ double tps[] = {0, 1.0, (double)est_membw(), est_fp()}; - sr = seq_tsr_ctr_mdl_cst.should_observe(tps); + sr = seq_tsr_ctr_mdl_cst->should_observe(tps); } else if (is_inner){ ASSERT(is_custom || func == NULL); double tps[] = {0.0, 1.0, (double)est_membw(), est_fp()}; if (is_custom){ if (inner_params.offload) - sr = seq_tsr_ctr_mdl_cst_off.should_observe(tps); + sr = seq_tsr_ctr_mdl_cst_off->should_observe(tps); else - sr = seq_tsr_ctr_mdl_cst_inr.should_observe(tps); + sr = seq_tsr_ctr_mdl_cst_inr->should_observe(tps); } else { if (inner_params.offload) - sr = seq_tsr_ctr_mdl_off.should_observe(tps); + sr = seq_tsr_ctr_mdl_off->should_observe(tps); else - sr = seq_tsr_ctr_mdl_inr.should_observe(tps); + sr = seq_tsr_ctr_mdl_inr->should_observe(tps); } } else { double tps[] = {0.0, 1.0, (double)est_membw(), est_fp()}; - sr = seq_tsr_ctr_mdl_ref.should_observe(tps); + sr = seq_tsr_ctr_mdl_ref->should_observe(tps); } if (!sr) return; @@ -518,11 +518,11 @@ namespace CTF_int { func); double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()}; - seq_tsr_ctr_mdl_cst.observe(tps); + seq_tsr_ctr_mdl_cst->observe(tps); } else if (is_inner){ ASSERT(is_custom || func == NULL); // double ps[] = {1.0, (double)est_membw(), est_fp()}; -// double est_time = seq_tsr_ctr_mdl_inr.est_time(ps); +// double est_time = seq_tsr_ctr_mdl_inr->est_time(ps); double st_time = MPI_Wtime(); sym_seq_ctr_inr(this->alpha, A, @@ -551,14 +551,14 @@ namespace CTF_int { double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()}; if (is_custom){ if (inner_params.offload) - seq_tsr_ctr_mdl_cst_off.observe(tps); + seq_tsr_ctr_mdl_cst_off->observe(tps); else - seq_tsr_ctr_mdl_cst_inr.observe(tps); + seq_tsr_ctr_mdl_cst_inr->observe(tps); } else { if (inner_params.offload) - seq_tsr_ctr_mdl_off.observe(tps); + seq_tsr_ctr_mdl_off->observe(tps); else - seq_tsr_ctr_mdl_inr.observe(tps); + seq_tsr_ctr_mdl_inr->observe(tps); } // seq_tsr_ctr_mdl_inr.print_param_guess(); } else { @@ -585,7 +585,7 @@ namespace CTF_int { idx_map_C); double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)est_membw(), est_fp()}; - seq_tsr_ctr_mdl_ref.observe(tps); + seq_tsr_ctr_mdl_ref->observe(tps); } } diff --git a/src/contraction/spctr_tsr.cxx b/src/contraction/spctr_tsr.cxx index 078731cc..235ec1cc 100644 --- a/src/contraction/spctr_tsr.cxx +++ b/src/contraction/spctr_tsr.cxx @@ -234,24 +234,24 @@ namespace CTF_int { return size_A+size_B+size_C; } - LinModel<3> seq_tsr_spctr_cst_off_k0(seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0"); - LinModel<3> seq_tsr_spctr_cst_off_k1(seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1"); - LinModel<3> seq_tsr_spctr_cst_off_k2(seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2"); - LinModel<3> seq_tsr_spctr_off_k0(seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0"); - LinModel<3> seq_tsr_spctr_off_k1(seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1"); - LinModel<3> seq_tsr_spctr_off_k2(seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2"); - LinModel<3> seq_tsr_spctr_cst_k0(seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0"); - LinModel<3> seq_tsr_spctr_cst_k1(seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1"); - LinModel<3> seq_tsr_spctr_cst_k2(seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2"); - LinModel<3> seq_tsr_spctr_cst_k3(seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3"); - LinModel<3> seq_tsr_spctr_cst_k4(seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4"); - LinModel<3> seq_tsr_spctr_cst_k5(seq_tsr_spctr_cst_k5_init,"seq_tsr_spctr_cst_k5"); - LinModel<3> seq_tsr_spctr_k0(seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0"); - LinModel<3> seq_tsr_spctr_k1(seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1"); - LinModel<3> seq_tsr_spctr_k2(seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2"); - LinModel<3> seq_tsr_spctr_k3(seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3"); - LinModel<3> seq_tsr_spctr_k4(seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4"); - LinModel<3> seq_tsr_spctr_k5(seq_tsr_spctr_k5_init,"seq_tsr_spctr_k5"); + Model* seq_tsr_spctr_cst_off_k0 = select_model<3>(seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0"); + Model* seq_tsr_spctr_cst_off_k1 = select_model<3>(seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1"); + Model* seq_tsr_spctr_cst_off_k2 = select_model<3>(seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2"); + Model* seq_tsr_spctr_off_k0 = select_model<3>(seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0"); + Model* seq_tsr_spctr_off_k1 = select_model<3>(seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1"); + Model* seq_tsr_spctr_off_k2 = select_model<3>(seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2"); + Model* seq_tsr_spctr_cst_k0 = select_model<3>(seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0"); + Model* seq_tsr_spctr_cst_k1 = select_model<3>(seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1"); + Model* seq_tsr_spctr_cst_k2 = select_model<3>(seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2"); + Model* seq_tsr_spctr_cst_k3 = select_model<3>(seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3"); + Model* seq_tsr_spctr_cst_k4 = select_model<3>(seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4"); + Model* seq_tsr_spctr_cst_k5 = select_model<3>(seq_tsr_spctr_cst_k5_init,"seq_tsr_spctr_cst_k5"); + Model* seq_tsr_spctr_k0 = select_model<3>(seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0"); + Model* seq_tsr_spctr_k1 = select_model<3>(seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1"); + Model* seq_tsr_spctr_k2 = select_model<3>(seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2"); + Model* seq_tsr_spctr_k3 = select_model<3>(seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3"); + Model* seq_tsr_spctr_k4 = select_model<3>(seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4"); + Model* seq_tsr_spctr_k5 = select_model<3>(seq_tsr_spctr_k5_init,"seq_tsr_spctr_k5"); double seq_tsr_spctr::est_time_fp(int nlyr, int nblk_A, int nblk_B, int nblk_C, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C){ // return COST_MEMBW*(size_A+size_B+size_C)+COST_FLOP*flops; @@ -260,61 +260,61 @@ namespace CTF_int { case 0: if (is_custom){ if (inner_params.offload) - return seq_tsr_spctr_cst_off_k0.est_time(ps); + return seq_tsr_spctr_cst_off_k0->est_time(ps); else - return seq_tsr_spctr_cst_k0.est_time(ps); + return seq_tsr_spctr_cst_k0->est_time(ps); } else { if (inner_params.offload) - return seq_tsr_spctr_off_k0.est_time(ps); + return seq_tsr_spctr_off_k0->est_time(ps); else - return seq_tsr_spctr_k0.est_time(ps); + return seq_tsr_spctr_k0->est_time(ps); } break; case 1: if (is_custom){ if (inner_params.offload) - return seq_tsr_spctr_cst_off_k1.est_time(ps); + return seq_tsr_spctr_cst_off_k1->est_time(ps); else - return seq_tsr_spctr_cst_k1.est_time(ps); + return seq_tsr_spctr_cst_k1->est_time(ps); } else { if (inner_params.offload) - return seq_tsr_spctr_off_k1.est_time(ps); + return seq_tsr_spctr_off_k1->est_time(ps); else - return seq_tsr_spctr_k1.est_time(ps); + return seq_tsr_spctr_k1->est_time(ps); } break; case 2: if (is_custom){ if (inner_params.offload) - return seq_tsr_spctr_cst_off_k2.est_time(ps); + return seq_tsr_spctr_cst_off_k2->est_time(ps); else - return seq_tsr_spctr_cst_k2.est_time(ps); + return seq_tsr_spctr_cst_k2->est_time(ps); } else { if (inner_params.offload) - return seq_tsr_spctr_off_k2.est_time(ps); + return seq_tsr_spctr_off_k2->est_time(ps); else - return seq_tsr_spctr_k2.est_time(ps); + return seq_tsr_spctr_k2->est_time(ps); } break; case 3: if (is_custom){ - return seq_tsr_spctr_cst_k3.est_time(ps); + return seq_tsr_spctr_cst_k3->est_time(ps); } else { - return seq_tsr_spctr_k3.est_time(ps); + return seq_tsr_spctr_k3->est_time(ps); } break; case 4: if (is_custom){ - return seq_tsr_spctr_cst_k4.est_time(ps); + return seq_tsr_spctr_cst_k4->est_time(ps); } else { - return seq_tsr_spctr_k4.est_time(ps); + return seq_tsr_spctr_k4->est_time(ps); } break; case 5: if (is_custom){ - return seq_tsr_spctr_cst_k5.est_time(ps); + return seq_tsr_spctr_cst_k5->est_time(ps); } else { - return seq_tsr_spctr_k5.est_time(ps); + return seq_tsr_spctr_k5->est_time(ps); } break; @@ -384,52 +384,52 @@ namespace CTF_int { switch (krnl_type){ case 0: if (is_custom){ - bsr = seq_tsr_spctr_cst_k0.should_observe(tps_); + bsr = seq_tsr_spctr_cst_k0->should_observe(tps_); } else { - bsr = seq_tsr_spctr_k0.should_observe(tps_); + bsr = seq_tsr_spctr_k0->should_observe(tps_); } break; case 1: if (is_custom){ if (inner_params.offload) - bsr = seq_tsr_spctr_cst_off_k1.should_observe(tps_); + bsr = seq_tsr_spctr_cst_off_k1->should_observe(tps_); else - bsr = seq_tsr_spctr_cst_k1.should_observe(tps_); + bsr = seq_tsr_spctr_cst_k1->should_observe(tps_); } else { if (inner_params.offload) - bsr = seq_tsr_spctr_off_k1.should_observe(tps_); + bsr = seq_tsr_spctr_off_k1->should_observe(tps_); else - bsr = seq_tsr_spctr_k1.should_observe(tps_); + bsr = seq_tsr_spctr_k1->should_observe(tps_); } break; case 2: if (is_custom){ if (inner_params.offload) - bsr = seq_tsr_spctr_cst_off_k2.should_observe(tps_); + bsr = seq_tsr_spctr_cst_off_k2->should_observe(tps_); else - bsr = seq_tsr_spctr_cst_k2.should_observe(tps_); + bsr = seq_tsr_spctr_cst_k2->should_observe(tps_); } else { if (inner_params.offload) - bsr = seq_tsr_spctr_off_k2.should_observe(tps_); + bsr = seq_tsr_spctr_off_k2->should_observe(tps_); else - bsr = seq_tsr_spctr_k2.should_observe(tps_); + bsr = seq_tsr_spctr_k2->should_observe(tps_); } break; case 3: if (is_custom){ - bsr = seq_tsr_spctr_cst_k3.should_observe(tps_); + bsr = seq_tsr_spctr_cst_k3->should_observe(tps_); } else { - bsr = seq_tsr_spctr_k3.should_observe(tps_); + bsr = seq_tsr_spctr_k3->should_observe(tps_); } break; case 4: if (is_custom){ // to-be-complete // should always observe - //seq_tsr_spctr_cst_k4.observe(tps); + //seq_tsr_spctr_cst_k4->observe(tps); bsr = true; } else { - bsr = seq_tsr_spctr_k4.should_observe(tps_); + bsr = seq_tsr_spctr_k4->should_observe(tps_); } break; } @@ -554,58 +554,58 @@ namespace CTF_int { switch (krnl_type){ case 0: if (is_custom){ - seq_tsr_spctr_cst_k0.observe(tps); + seq_tsr_spctr_cst_k0->observe(tps); } else { - seq_tsr_spctr_k0.observe(tps); + seq_tsr_spctr_k0->observe(tps); } break; case 1: if (is_custom){ if (inner_params.offload) - seq_tsr_spctr_cst_off_k1.observe(tps); + seq_tsr_spctr_cst_off_k1->observe(tps); else - seq_tsr_spctr_cst_k1.observe(tps); + seq_tsr_spctr_cst_k1->observe(tps); } else { if (inner_params.offload) - seq_tsr_spctr_off_k1.observe(tps); + seq_tsr_spctr_off_k1->observe(tps); else - seq_tsr_spctr_k1.observe(tps); + seq_tsr_spctr_k1->observe(tps); } break; case 2: if (is_custom){ if (inner_params.offload) - seq_tsr_spctr_cst_off_k2.observe(tps); + seq_tsr_spctr_cst_off_k2->observe(tps); else - seq_tsr_spctr_cst_k2.observe(tps); + seq_tsr_spctr_cst_k2->observe(tps); } else { if (inner_params.offload) - seq_tsr_spctr_off_k2.observe(tps); + seq_tsr_spctr_off_k2->observe(tps); else - seq_tsr_spctr_k2.observe(tps); + seq_tsr_spctr_k2->observe(tps); } break; case 3: if (is_custom){ - seq_tsr_spctr_cst_k3.observe(tps); + seq_tsr_spctr_cst_k3->observe(tps); } else { - seq_tsr_spctr_k3.observe(tps); + seq_tsr_spctr_k3->observe(tps); } break; case 4: if (is_custom){ // to-be-complete // should always observe - seq_tsr_spctr_cst_k4.observe(tps); + seq_tsr_spctr_cst_k4->observe(tps); } else { - seq_tsr_spctr_k4.observe(tps); + seq_tsr_spctr_k4->observe(tps); } break; case 5: if (is_custom){ - seq_tsr_spctr_cst_k5.observe(tps); + seq_tsr_spctr_cst_k5->observe(tps); } else { - seq_tsr_spctr_k5.observe(tps); + seq_tsr_spctr_k5->observe(tps); } break; } @@ -995,23 +995,23 @@ namespace CTF_int { } } - LinModel<2> pin_keys_mdl(pin_keys_mdl_init,"pin_keys_mdl"); + Model* pin_keys_mdl = select_model<2>(pin_keys_mdl_init,"pin_keys_mdl"); double spctr_pin_keys::est_time_fp(int nlyr, int nblk_A, int nblk_B, int nblk_C, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C) { switch (AxBxC){ case 0: { double ps[] = {1.0, dns_blk_sz*nnz_frac_A}; - return pin_keys_mdl.est_time(ps); + return pin_keys_mdl->est_time(ps); } case 1: { double ps[] = {1.0, dns_blk_sz*nnz_frac_B}; - return pin_keys_mdl.est_time(ps); + return pin_keys_mdl->est_time(ps); } case 2: { double ps[] = {1.0, dns_blk_sz*nnz_frac_C}; - return 2.*pin_keys_mdl.est_time(ps); + return 2.*pin_keys_mdl->est_time(ps); } } return 0; @@ -1085,7 +1085,7 @@ namespace CTF_int { double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)nnz}; - pin_keys_mdl.observe(tps); + pin_keys_mdl->observe(tps); TAU_FSTOP(spctr_pin_keys); rec_ctr->run(nA, nblk_A, size_blk_A, @@ -1110,7 +1110,7 @@ namespace CTF_int { depin(sr_C, order, lens, divisor, nblk_C, virt_dim, phys_rank, new_C, new_nnz_C, size_blk_C, new_C, true); double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)nnz}; - pin_keys_mdl.observe(tps); + pin_keys_mdl->observe(tps); break; } TAU_FSTOP(spctr_pin_keys); diff --git a/src/interface/common.cxx b/src/interface/common.cxx index df88ba85..98caf34c 100644 --- a/src/interface/common.cxx +++ b/src/interface/common.cxx @@ -47,19 +47,19 @@ namespace CTF_int { //static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW}; - LinModel<3> alltoall_mdl(alltoall_mdl_init,"alltoall_mdl"); - LinModel<3> alltoallv_mdl(alltoallv_mdl_init,"alltoallv_mdl"); + Model* alltoall_mdl = select_model<3>(alltoall_mdl_init,"alltoall_mdl"); + Model* alltoallv_mdl = select_model<3>(alltoallv_mdl_init,"alltoallv_mdl"); #ifdef BGQ //static double init_lg_mdl[] = {COST_LATENCY, COST_LATENCY, 0.0, COST_NETWBW + 2.0*COST_MEMBW}; #else //static double init_lg_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW + 2.0*COST_MEMBW, 0.0}; #endif - LinModel<3> red_mdl(red_mdl_init,"red_mdl"); - LinModel<3> red_mdl_cst(red_mdl_cst_init,"red_mdl_cst"); - LinModel<3> allred_mdl(allred_mdl_init,"allred_mdl"); - LinModel<3> allred_mdl_cst(allred_mdl_cst_init,"allred_mdl_cst"); - LinModel<3> bcast_mdl(bcast_mdl_init,"bcast_mdl"); + Model* red_mdl = select_model<3>(red_mdl_init,"red_mdl"); + Model* red_mdl_cst = select_model<3>(red_mdl_cst_init,"red_mdl_cst"); + Model* allred_mdl = select_model<3>(allred_mdl_init,"allred_mdl"); + Model* allred_mdl_cst = select_model<3>(allred_mdl_cst_init,"allred_mdl_cst"); + Model* bcast_mdl = select_model<3>(bcast_mdl_init,"bcast_mdl"); template @@ -340,42 +340,42 @@ namespace CTF_int { double CommData::estimate_bcast_time(int64_t msg_sz){ double ps[] = {1.0, log2((double)np), (double)msg_sz}; - return bcast_mdl.est_time(ps); + return bcast_mdl->est_time(ps); } double CommData::estimate_allred_time(int64_t msg_sz, MPI_Op op){ double ps[] = {1.0, log2((double)np), (double)msg_sz*log2((double)(np))}; if (op >= MPI_MAX && op <= MPI_REPLACE) - return allred_mdl.est_time(ps); + return allred_mdl->est_time(ps); else - return allred_mdl_cst.est_time(ps); + return allred_mdl_cst->est_time(ps); } double CommData::estimate_red_time(int64_t msg_sz, MPI_Op op){ double ps[] = {1.0, log2((double)np), (double)msg_sz*log2((double)(np))}; if (op >= MPI_MAX && op <= MPI_REPLACE) - return red_mdl.est_time(ps); + return red_mdl->est_time(ps); else - return red_mdl_cst.est_time(ps); + return red_mdl_cst->est_time(ps); } /* double CommData::estimate_csrred_time(int64_t msg_sz, MPI_Op op){ double ps[] = {1.0, log2((double)np), (double)msg_sz}; if (op >= MPI_MAX && op <= MPI_REPLACE) - return csrred_mdl.est_time(ps); + return csrred_mdl->est_time(ps); else - return csrred_mdl_cst.est_time(ps); + return csrred_mdl_cst->est_time(ps); }*/ double CommData::estimate_alltoall_time(int64_t chunk_sz) { double ps[] = {1.0, log2((double)np), log2((double)np)*np*chunk_sz}; - return alltoall_mdl.est_time(ps); + return alltoall_mdl->est_time(ps); } double CommData::estimate_alltoallv_time(int64_t tot_sz) { double ps[] = {1.0, log2((double)np), log2((double)np)*tot_sz}; - return alltoallv_mdl.est_time(ps); + return alltoallv_mdl->est_time(ps); } @@ -386,7 +386,7 @@ namespace CTF_int { int tsize_; MPI_Type_size(mdtype, &tsize_); double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_}; - if (!bcast_mdl.should_observe(tps_)) return; + if (!bcast_mdl->should_observe(tps_)) return; #endif #ifdef TUNE @@ -399,7 +399,7 @@ namespace CTF_int { int tsize; MPI_Type_size(mdtype, &tsize); double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize}; - bcast_mdl.observe(tps); + bcast_mdl->observe(tps); #endif } @@ -414,9 +414,9 @@ namespace CTF_int { double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))}; bool bsr = true; if (op >= MPI_MAX && op <= MPI_REPLACE) - bsr = allred_mdl.should_observe(tps_); + bsr = allred_mdl->should_observe(tps_); else - bsr = allred_mdl_cst.should_observe(tps_); + bsr = allred_mdl_cst->should_observe(tps_); if(!bsr) return; #endif @@ -430,9 +430,9 @@ namespace CTF_int { MPI_Type_size(mdtype, &tsize); double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))}; if (op >= MPI_MAX && op <= MPI_REPLACE) - allred_mdl.observe(tps); + allred_mdl->observe(tps); else - allred_mdl_cst.observe(tps); + allred_mdl_cst->observe(tps); } void CommData::red(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root){ @@ -445,9 +445,9 @@ namespace CTF_int { double tps_[] = {0.0, 1.0, log2(np), ((double)count)*tsize_*std::max(.5,(double)log2(np))}; bool bsr = true; if (op >= MPI_MAX && op <= MPI_REPLACE) - bsr = red_mdl.should_observe(tps_); + bsr = red_mdl->should_observe(tps_); else - bsr = red_mdl_cst.should_observe(tps_); + bsr = red_mdl_cst->should_observe(tps_); if(!bsr) return; #endif @@ -461,9 +461,9 @@ namespace CTF_int { MPI_Type_size(mdtype, &tsize); double tps[] = {exe_time, 1.0, log2(np), ((double)count)*tsize*std::max(.5,(double)log2(np))}; if (op >= MPI_MAX && op <= MPI_REPLACE) - red_mdl.observe(tps); + red_mdl->observe(tps); else - red_mdl_cst.observe(tps); + red_mdl_cst->observe(tps); } @@ -480,7 +480,7 @@ namespace CTF_int { // change-of-observe int64_t tot_sz_ = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size; double tps_[] = {0.0, 1.0, log2(np), (double)tot_sz_}; - if (!alltoallv_mdl.should_observe(tps_)) return; + if (!alltoallv_mdl->should_observe(tps_)) return; #endif double st_time = MPI_Wtime(); @@ -573,7 +573,7 @@ namespace CTF_int { double exe_time = MPI_Wtime()-st_time; int64_t tot_sz = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size; double tps[] = {exe_time, 1.0, log2(np), (double)tot_sz}; - alltoallv_mdl.observe(tps); + alltoallv_mdl->observe(tps); } char * get_default_inds(int order, int start_index){ diff --git a/src/redistribution/dgtog_redist.cxx b/src/redistribution/dgtog_redist.cxx index fffee025..c53e018a 100644 --- a/src/redistribution/dgtog_redist.cxx +++ b/src/redistribution/dgtog_redist.cxx @@ -6,11 +6,11 @@ #include "dgtog_bucket.h" namespace CTF_int { //static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW}; - LinModel<3> dgtog_res_mdl(dgtog_res_mdl_init,"dgtog_res_mdl"); + Model* dgtog_res_mdl = select_model<3>(dgtog_res_mdl_init,"dgtog_res_mdl"); double dgtog_est_time(int64_t tot_sz, int np){ double ps[] = {1.0, (double)log2(np), (double)tot_sz*log2(np)}; - return dgtog_res_mdl.est_time(ps); + return dgtog_res_mdl->est_time(ps); } } diff --git a/src/redistribution/dgtog_redist_ror.h b/src/redistribution/dgtog_redist_ror.h index 031b6885..69a15dc1 100644 --- a/src/redistribution/dgtog_redist_ror.h +++ b/src/redistribution/dgtog_redist_ror.h @@ -715,6 +715,6 @@ void dgtog_reshuffle(int const * sym, double tps[] = {exe_time, 1.0, (double)log2(ord_glb_comm.np), (double)std::max(old_dist.size, new_dist.size)*log2(ord_glb_comm.np)*sr->el_size}; // double-check - dgtog_res_mdl.observe(tps); + dgtog_res_mdl->observe(tps); TAU_FSTOP(dgtog_reshuffle); } diff --git a/src/redistribution/nosym_transp.cxx b/src/redistribution/nosym_transp.cxx index e312a0ca..03c63dcb 100644 --- a/src/redistribution/nosym_transp.cxx +++ b/src/redistribution/nosym_transp.cxx @@ -12,9 +12,9 @@ namespace CTF_int { //static double init_ct_ps[] = {COST_LATENCY, 1.5*COST_MEMBW}; - LinModel<2> long_contig_transp_mdl(long_contig_transp_mdl_init,"long_contig_transp_mdl"); - LinModel<2> shrt_contig_transp_mdl(shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl"); - LinModel<2> non_contig_transp_mdl(non_contig_transp_mdl_init,"non_contig_transp_mdl"); + Model* long_contig_transp_mdl = select_model<2>(long_contig_transp_mdl_init,"long_contig_transp_mdl"); + Model* shrt_contig_transp_mdl = select_model<2>(shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl"); + Model* non_contig_transp_mdl = select_model<2>(non_contig_transp_mdl_init,"non_contig_transp_mdl"); //#define OPT_NOSYM_TR @@ -448,11 +448,11 @@ namespace CTF_int { double tps[] = {0.0, 1.0, (double)tot_sz}; bool should_run = true; if (contig0 < 4){ - should_run = non_contig_transp_mdl.should_observe(tps); + should_run = non_contig_transp_mdl->should_observe(tps); } else if (contig0 <= 64){ - should_run = shrt_contig_transp_mdl.should_observe(tps); + should_run = shrt_contig_transp_mdl->should_observe(tps); } else { - should_run = long_contig_transp_mdl.should_observe(tps); + should_run = long_contig_transp_mdl->should_observe(tps); } if (!should_run) return; } @@ -509,11 +509,11 @@ namespace CTF_int { double exe_time = MPI_Wtime() - st_time; double tps[] = {exe_time, 1.0, (double)tot_sz}; if (contig0 < 4){ - non_contig_transp_mdl.observe(tps); + non_contig_transp_mdl->observe(tps); } else if (contig0 <= 64){ - shrt_contig_transp_mdl.observe(tps); + shrt_contig_transp_mdl->observe(tps); } else { - long_contig_transp_mdl.observe(tps); + long_contig_transp_mdl->observe(tps); } #endif TAU_FSTOP(nosym_transpose); @@ -780,11 +780,11 @@ namespace CTF_int { //this model ignores cache-line size double ps[] = {1.0, (double)tot_sz}; if (contig0 < 4){ - return non_contig_transp_mdl.est_time(ps); + return non_contig_transp_mdl->est_time(ps); } else if (contig0 <= 64){ - return shrt_contig_transp_mdl.est_time(ps); + return shrt_contig_transp_mdl->est_time(ps); } else { - return long_contig_transp_mdl.est_time(ps); + return long_contig_transp_mdl->est_time(ps); } } diff --git a/src/redistribution/redist.cxx b/src/redistribution/redist.cxx index 2773e099..06802dd6 100644 --- a/src/redistribution/redist.cxx +++ b/src/redistribution/redist.cxx @@ -445,11 +445,11 @@ namespace CTF_int { } //static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW}; - LinModel<2> blres_mdl(blres_mdl_init,"blres_mdl"); + Model* blres_mdl = select_model<2>(blres_mdl_init,"blres_mdl"); double blres_est_time(int64_t tot_sz, int nv0, int nv1){ double ps[] = {(double)nv0+nv1, (double)tot_sz}; - return blres_mdl.est_time(ps); + return blres_mdl->est_time(ps); } void block_reshuffle(distribution const & old_dist, @@ -519,7 +519,7 @@ namespace CTF_int { tps[1] = (double)num_old_virt+num_new_virt; tps[2] = (double)std::max(new_dist.size, new_dist.size); - if (!(blres_mdl.should_observe(tps))){ + if (!(blres_mdl->should_observe(tps))){ cdealloc(idx); cdealloc(old_loc_lda); cdealloc(new_loc_lda); @@ -608,7 +608,7 @@ namespace CTF_int { tps[0] = exe_time; tps[1] = (double)num_old_virt+num_new_virt; tps[2] = (double)std::max(new_dist.size, new_dist.size); - blres_mdl.observe(tps); + blres_mdl->observe(tps); free(tps); #endif diff --git a/src/shared/model.cxx b/src/shared/model.cxx index ef17ebf4..16318984 100644 --- a/src/shared/model.cxx +++ b/src/shared/model.cxx @@ -840,4 +840,5 @@ namespace CTF_int { template class CubicModel<2>; template class CubicModel<3>; template class CubicModel<4>; + } diff --git a/src/shared/model.h b/src/shared/model.h index 2ff60885..5428f982 100644 --- a/src/shared/model.h +++ b/src/shared/model.h @@ -16,6 +16,9 @@ namespace CTF_int { class Model { public: + virtual void observe(double const * time_param); + virtual bool should_observe(double const * time_param); + virtual double est_time(double const * param); virtual void update(MPI_Comm cm){}; virtual void print(){}; virtual void print_uo(){}; @@ -34,7 +37,7 @@ namespace CTF_int { * \brief Linear performance models, which given measurements, provides new model guess */ template - class LinModel : Model { + class LinModel : public Model { private: /** \brief number of performance observations made (calls to observe() */ int64_t nobs; @@ -146,7 +149,7 @@ namespace CTF_int { * \brief Cubic performance models, which given measurements, provides new model guess */ template - class CubicModel : Model { + class CubicModel : public Model { private: LinModel lmdl; @@ -221,6 +224,31 @@ namespace CTF_int { }; + template + Model* select_model(double const * init_guess, char const * name){ + char* model_type = getenv("MODEL_TYPE"); + std::string model_type_str; + if (!model_type){ + model_type_str = std::string("Global"); + } else{ + model_type_str = std::string(model_type); + } + char* max_num_observations_str = getenv("MAX_NUM_OBSERVATIONS"); + int max_num_observations; + if (!max_num_observations_str){ + max_num_observations = 1048576; + } else{ + max_num_observations = atoi(max_num_observations_str); + } + + if (model_type_str == "Global"){ + return new LinModel(init_guess,name,max_num_observations); + } else{ + // NOTE: Use of CubicModel is inaccessible. It has not been tested recently. + assert(0); + } + return nullptr; + } } #endif diff --git a/src/shared/offload.cu b/src/shared/offload.cu index 1b6a97e6..05a18352 100644 --- a/src/shared/offload.cu +++ b/src/shared/offload.cu @@ -67,17 +67,17 @@ namespace CTF_int{ /*offload_tsr::~offload_tsr(){ }*/ - LinModel<2> upload_mdl(upload_mdl_init,"upload_mdl"); - LinModel<2> download_mdl(download_mdl_init,"download_mdl"); + Model* upload_mdl = select_model<2>(upload_mdl_init,"upload_mdl"); + Model* download_mdl = select_model<2>(download_mdl_init,"download_mdl"); double estimate_download_time(int64_t size){ double ps[] = {1.0, (double)size}; - return download_mdl.est_time(ps); + return download_mdl->est_time(ps); } double estimate_upload_time(int64_t size){ double ps[] = {1.0, (double)size}; - return upload_mdl.est_time(ps); + return upload_mdl->est_time(ps); } @@ -137,7 +137,7 @@ namespace CTF_int{ cudaMemcpyDeviceToHost); double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)nbytes}; - download_mdl.observe(tps); + download_mdl->observe(tps); TAU_FSTOP(cuda_download); assert(err == cudaSuccess); } @@ -151,7 +151,7 @@ namespace CTF_int{ double exe_time = MPI_Wtime()-st_time; double tps[] = {exe_time, 1.0, (double)nbytes}; - upload_mdl.observe(tps); + upload_mdl->observe(tps); TAU_FSTOP(cuda_upload); assert(err == cudaSuccess); } diff --git a/src/tensor/algstrct.cxx b/src/tensor/algstrct.cxx index 855615e2..d22cf3ea 100644 --- a/src/tensor/algstrct.cxx +++ b/src/tensor/algstrct.cxx @@ -9,8 +9,8 @@ using namespace std; namespace CTF_int { - LinModel<3> csrred_mdl(csrred_mdl_init,"csrred_mdl"); - LinModel<3> csrred_mdl_cst(csrred_mdl_cst_init,"csrred_mdl_cst"); + Model* csrred_mdl = select_model<3>(csrred_mdl_init,"csrred_mdl"); + Model* csrred_mdl_cst = select_model<3>(csrred_mdl_cst_init,"csrred_mdl_cst"); template @@ -522,7 +522,7 @@ namespace CTF_int { double tps[] = {t_end, 1.0, log2((double)p), (double)sz_A}; // note-quite-sure - csrred_mdl.observe(tps); + csrred_mdl->observe(tps); TAU_FSTOP(csr_reduce); char * data = out->all_data; delete out; @@ -544,7 +544,7 @@ namespace CTF_int { double algstrct::estimate_csr_red_time(int64_t msg_sz, CommData const * cdt) const { double ps[] = {1.0, log2((double)cdt->np), (double)msg_sz}; - return csrred_mdl.est_time(ps); + return csrred_mdl->est_time(ps); } void algstrct::acc(char * b, char const * beta, char const * a, char const * alpha) const { diff --git a/src/tensor/untyped_tensor.cxx b/src/tensor/untyped_tensor.cxx index 134b01fe..81e34dd1 100644 --- a/src/tensor/untyped_tensor.cxx +++ b/src/tensor/untyped_tensor.cxx @@ -21,10 +21,10 @@ using namespace CTF; namespace CTF_int { - LinModel<3> spredist_mdl(spredist_mdl_init,"spredist_mdl"); + Model* spredist_mdl = select_model<3>(spredist_mdl_init,"spredist_mdl"); double spredist_est_time(int64_t size, int np){ double ps[] = {1.0, (double)log2(np), (double)size*log2(np)}; - return spredist_mdl.est_time(ps); + return spredist_mdl->est_time(ps); } // static const char * SY_strings[4] = {"NS", "SY", "AS", "SH"}; @@ -2987,7 +2987,7 @@ namespace CTF_int { // change-of-observe double nnz_frac_ = ((double)nnz_tot)/(old_dist.size*wrld->cdt.np); double tps_[] = {0.0, 1.0, (double)log2(wrld->cdt.np), (double)std::max(old_dist.size, new_dist.size)*log2(wrld->cdt.np)*sr->el_size*nnz_frac_}; - if (!spredist_mdl.should_observe(tps_)) return SUCCESS; + if (!spredist_mdl->should_observe(tps_)) return SUCCESS; double st_time = MPI_Wtime(); #endif @@ -3009,7 +3009,7 @@ namespace CTF_int { double exe_time = MPI_Wtime()-st_time; double nnz_frac = ((double)nnz_tot)/(old_dist.size*wrld->cdt.np); double tps[] = {exe_time, 1.0, (double)log2(wrld->cdt.np), (double)std::max(old_dist.size, new_dist.size)*log2(wrld->cdt.np)*sr->el_size*nnz_frac}; - spredist_mdl.observe(tps); + spredist_mdl->observe(tps); #endif } else { if (order <= 12)