diff --git a/ALS/als_sp.py b/ALS/als_sp.py index fa292cf..a9ce30f 100644 --- a/ALS/als_sp.py +++ b/ALS/als_sp.py @@ -17,49 +17,63 @@ CG_thresh = 1.e-10 sparse_format = True -def CG(Ax0,b,x0,f1,f2,r,regParam,omega,I,string): - - t_cg_conv = ctf.timer("ALS_cg_conv") - t_cg_conv.start() +class implicit_ATA: + def __init__(self, f1, f2, omega, string): + self.f1 = f1 + self.f2 = f2 + self.omega = omega + self.string = string + + def mul(self, idx, sk): + if self.string=="U": + return self.f1.i("J"+idx[1]) \ + *self.f2.i("K"+idx[1]) \ + *ctf.TTTP(self.omega, [sk, self.f1, self.f2]).i(idx[0]+"JK") + if self.string=="V": + return self.f1.i("I"+idx[1]) \ + *self.f2.i("K"+idx[1]) \ + *ctf.TTTP(self.omega, [self.f1, sk, self.f2]).i("I"+idx[0]+"K") + if self.string=="W": + return self.f1.i("I"+idx[1]) \ + *self.f2.i("J"+idx[1]) \ + *ctf.TTTP(self.omega, [self.f1, self.f2, sk]).i("IJ"+idx[0]) + +def CG(A,b,x0,r,regParam,I,is_implicit=False): + + t_batch_cg = ctf.timer("ALS_exp_cg") + t_batch_cg.start() + Ax0 = ctf.tensor((I,r)) + if is_implicit: + Ax0.i("ir") << A.mul("ir",x0) + else: + Ax0.i("ir") << A.i("irl")*x0.i("il") + Ax0 += regParam*x0 rk = b - Ax0 sk = rk xk = x0 for i in range(sk.shape[-1]): # how many iterations? Ask = ctf.tensor((I,r)) - t_cg_TTTP = ctf.timer("ALS_cg_TTTP") - t_cg_TTTP.start() - if (string=="U"): - t0 = time.time() - #Ask.i("ir") << f1.i("Jr")*f2.i("Kr")*omega.i("iJK")*f1.i("JR")*f2.i("KR")*sk.i("iR") - Ask.i("ir") << f1.i("Jr")*f2.i("Kr")*ctf.TTTP(omega,[sk,f1,f2]).i("iJK") - t1 = time.time() - if ctf.comm().rank == 0 and status_prints == True: - print('form Ask takes {}'.format(t1-t0)) - if (string=="V"): - #Ask.i("jr") << f1.i("Ir")*f2.i("Kr")*omega.i("IjK")*f1.i("IR")*f2.i("KR")*sk.i("jR") - Ask.i("jr") << f1.i("Ir")*f2.i("Kr")*ctf.TTTP(omega,[f1,sk,f2]).i("IjK") - if (string=="W"): - #Ask.i("kr") << f1.i("Ir")*f2.i("Jr")*omega.i("IJk")*f1.i("IR")*f2.i("JR")*sk.i("kR") - Ask.i("kr") << f1.i("Ir")*f2.i("Jr")*ctf.TTTP(omega,[f1,f2,sk]).i("IJk") - t_cg_TTTP.stop() + t_cg_bmvec = ctf.timer("ALS_exp_cg_mvec") + t_cg_bmvec.start() + t0 = time.time() + if is_implicit: + Ask.i("ir") << A.mul("ir",sk) + else: + Ask.i("ir") << A.i("irl")*sk.i("il") + t1 = time.time() + if ctf.comm().rank == 0 and status_prints == True: + print('form Ask takes {}'.format(t1-t0)) + t_cg_bmvec.stop() Ask += regParam*sk rnorm = ctf.tensor(I) rnorm.i("i") << rk.i("ir") * rk.i("ir") - #print("rnorm",rnorm.to_nparray()) - - #for i in range(I): - # if rnorm[i] < 1.e-16: - - # break skAsk = ctf.tensor(I) skAsk.i("i") << sk.i("ir") * Ask.i("ir") - #if (rnorm[i] < 1.e-30): - # continue alpha = rnorm/(skAsk + 1.e-30) alphask = ctf.tensor((I,r)) @@ -73,8 +87,6 @@ def CG(Ax0,b,x0,f1,f2,r,regParam,omega,I,string): rk1norm = ctf.tensor(I) rk1norm.i("i") << rk1.i("ir") * rk1.i("ir") - #if (rk1norm[i] < 1.e-30): - # continue beta = rk1norm/(rnorm+ 1.e-30) betask = ctf.tensor((I,r)) @@ -86,181 +98,141 @@ def CG(Ax0,b,x0,f1,f2,r,regParam,omega,I,string): if ctf.vecnorm(rk) < CG_thresh: break - #print("CG residual after",sk.shape[-1],"iterations is",ctf.vecnorm(rk)) + #print("explicit CG residual after",sk.shape[-1],"iterations is",ctf.vecnorm(rk)) - t_cg_conv.stop() + t_batch_cg.stop() return xk -def updateFactor_CG(T,U,V,W,regParam,omega,I,J,K,r,block,string): - - t_RHS = ctf.timer("ALS_cg_RHS") - t_cg_TTTP = ctf.timer("ALS_cg_TTTP") - t_o_slice = ctf.timer("ALS_omega_slice") - if (string=="U"): +def solve_SVD(A,b,factor,r,regParam): + [U_,S_,VT_] = ctf.svd(A) + S_ = 1/(S_+regParam*ctf.ones(S_.shape)) + factor.set_zero() + factor.i("r") << VT_.i("kr")*S_.i("k")*U_.i("tk")*b.i("t") + + return factor - size = int(I/block) - for n in range(block): +def updateFactor(T,U,V,W,regParam,omega,I,J,K,r,block_size,string,use_implicit): - t0 = time.time() + t_RHS = ctf.timer("ALS_imp_cg_RHS") + t_cg_TTTP = ctf.timer("ALS_imp_cg_TTTP") + t_o_slice = ctf.timer("ALS_imp_omega_slice") + t_form_EQs = ctf.timer("ALS_exp_form_EQs") + t_form_RHS = ctf.timer("ALS_exp_form_RHS") + if (string=="U"): + num_blocks = int((I+block_size-1)/block_size) + for n in range(num_blocks): + I_start = n*block_size + I_end = min(I,I_start+block_size) + bsize = I_end-I_start t_o_slice.start() - nomega = omega[n*size : (n+1)*size,:,:] + nomega = omega[I_start : I_end,:,:] t_o_slice.stop() assert(nomega.sp == sparse_format) - t1 = time.time() - - x0 = ctf.random.random((size,r)) - Ax0 = ctf.tensor((size,r)) - t_cg_TTTP.start() - #Ax0.i("ir") << V.i("Jr")*W.i("Kr")*nomega.i("iJK")*V.i("JR")*W.i("KR")*x0.i("iR") # LHS; ATA using matrix-vector multiplication - Ax0.i("ir") << V.i("Jr")*W.i("Kr")*ctf.TTTP(nomega, [x0,V,W]).i("iJK") - t_cg_TTTP.stop() - Ax0 += regParam*x0 - t2 = time.time() - - b = ctf.tensor((size,r)) + x0 = ctf.random.random((bsize,r)) + b = ctf.tensor((bsize,r)) t_RHS.start() - b.i("ir") << V.i("Jr")*W.i("Kr")*T[n*size : (n+1)*size,:,:].i("iJK") # RHS; ATb + b.i("ir") << V.i("Jr")*W.i("Kr")*T[I_start : I_end,:,:].i("iJK") # RHS; ATb t_RHS.stop() - t3 = time.time() - - if ctf.comm().rank() == 0 and status_prints == True: - print('slicing Omega takes {}'.format(t1 - t0)) - print('form Ax0 takes {}'.format(t2 - t1)) - print('form b takes {}'.format(t3 - t2)) - #U[n*size : (n+1)*size,:].set_zero() - U[n*size : (n+1)*size,:] = CG(Ax0,b,x0,V,W,r,regParam,nomega,size,"U") - + if use_implicit: + Ax0 = ctf.tensor((bsize,r)) + t_cg_TTTP.start() + Ax0.i("ir") << V.i("Jr")*W.i("Kr")*ctf.TTTP(nomega, [x0,V,W]).i("iJK") + t_cg_TTTP.stop() + Ax0 += regParam*x0 + U[I_start : I_end,:] = CG(implicit_ATA(V,W,nomega,"U"),b,x0,r,regParam,bsize,True) + else: + A = ctf.tensor((bsize,r,r)) + t_form_EQs.start() + A.i("iuv") << V.i("Ju")*W.i("Ku") * nomega.i("iJK")*V.i("Jv")*W.i("Kv") + t_form_EQs.stop() + U[I_start : I_end,:] = CG(A,b,x0,r,regParam,bsize) + return U if (string=="V"): - size = int(J/block) - for n in range(block): + num_blocks = int((J+block_size-1)/block_size) + for n in range(num_blocks): + J_start = n*block_size + J_end = min(J,J_start+block_size) + bsize = J_end-J_start t_o_slice.start() - nomega = omega[:,n*size : (n+1)*size,:] + nomega = omega[:,J_start : J_end,:] t_o_slice.stop() - x0 = ctf.random.random((size,r)) - Ax0 = ctf.tensor((size,r)) - t_cg_TTTP.start() - #Ax0.i("jr") << U.i("Ir")*W.i("Kr")*nomega.i("IjK")*U.i("IR")*W.i("KR")*x0.i("jR") # LHS; ATA using matrix-vector multiplication - Ax0.i("jr") << U.i("Ir")*W.i("Kr")*ctf.TTTP(nomega, [U,x0,W]).i("IjK") - t_cg_TTTP.stop() - Ax0 += regParam*x0 - b = ctf.tensor((size,r)) + x0 = ctf.random.random((bsize,r)) + b = ctf.tensor((bsize,r)) t_RHS.start() - b.i("jr") << U.i("Ir")*W.i("Kr")*T[:,n*size : (n+1)*size,:].i("IjK") # RHS; ATb + b.i("jr") << U.i("Ir")*W.i("Kr")*T[:,J_start : J_end,:].i("IjK") # RHS; ATb t_RHS.stop() - #V[n*size : (n+1)*size,:].set_zero() - V[n*size : (n+1)*size,:] = CG(Ax0,b,x0,U,W,r,regParam,nomega,size,"V") + if use_implicit: + Ax0 = ctf.tensor((bsize,r)) + t_cg_TTTP.start() + Ax0.i("jr") << U.i("Ir")*W.i("Kr")*ctf.TTTP(nomega, [U,x0,W]).i("IjK") + t_cg_TTTP.stop() + Ax0 += regParam*x0 + V[J_start : J_end,:] = CG(implicit_ATA(U,W,nomega,"V"),b,x0,r,regParam,bsize,True) + else: + A = ctf.tensor((bsize,r,r)) + t_form_EQs.start() + A.i("juv") << U.i("Iu")*W.i("Ku") * nomega.i("IjK") * U.i("Iv")*W.i("Kv") + t_form_EQs.stop() + V[J_start : J_end,:] = CG(A,b,x0,r,regParam,bsize) - return V if (string=="W"): - size = int(K/block) - for n in range(block): + num_blocks = int((K+block_size-1)/block_size) + for n in range(num_blocks): + K_start = n*block_size + K_end = min(K,K_start+block_size) + bsize = K_end-K_start t_o_slice.start() - nomega = omega[:,:,n*size : (n+1)*size] + nomega = omega[:,:,K_start : K_end] t_o_slice.stop() - x0 = ctf.random.random((size,r)) - Ax0 = ctf.tensor((size,r)) - t_cg_TTTP.start() - #Ax0.i("kr") << U.i("Ir")*V.i("Jr")*nomega.i("IJk")*U.i("IR")*V.i("JR")*x0.i("kR") # LHS; ATA using matrix-vector multiplication - Ax0.i("kr") << U.i("Ir")*V.i("Jr")*ctf.TTTP(nomega, [U,V,x0]).i("IJk") - t_cg_TTTP.stop() - Ax0 += regParam*x0 - b = ctf.tensor((size,r)) + x0 = ctf.random.random((bsize,r)) + b = ctf.tensor((bsize,r)) t_RHS.start() - b.i("kr") << U.i("Ir")*V.i("Jr")* T[:,:,n*size : (n+1)*size].i("IJk") # RHS; ATb + b.i("kr") << U.i("Ir")*V.i("Jr")* T[:,:,K_start : K_end].i("IJk") # RHS; ATb t_RHS.stop() - W[n*size : (n+1)*size,:] = CG(Ax0,b,x0,U,V,r,regParam,nomega,size,"W") + if use_implicit: + Ax0 = ctf.tensor((bsize,r)) + t_cg_TTTP.start() + Ax0.i("kr") << U.i("Ir")*V.i("Jr")*ctf.TTTP(nomega, [U,V,x0]).i("IJk") + t_cg_TTTP.stop() + Ax0 += regParam*x0 + W[K_start : K_end,:] = CG(implicit_ATA(U,V,nomega,"W"),b,x0,r,regParam,bsize,True) + else: + A = ctf.tensor((bsize,r,r)) + t_form_EQs.start() + A.i("kuv") << U.i("Iu")*V.i("Ju")*nomega.i("IJk")*U.i("Iv")*V.i("Jv") # LHS; ATA using matrix-vector multiplication + t_form_EQs.stop() + W[K_start : K_end,:] = CG(A,b,x0,r,regParam,bsize) return W - -def Kressner_SVD(A,b,factor,r,regParam): - [U_,S_,VT_] = ctf.svd(A) - S_ = 1/(S_+regParam*ctf.ones(S_.shape)) - factor.set_zero() - factor.i("r") << VT_.i("kr")*S_.i("k")*U_.i("tk")*b.i("t") - - return factor - -def Kressner(A,b,factor,r,regParam): +def solve(A,b,factor,r,regParam): n = A.shape[0] L = ctf.cholesky(A+regParam*ctf.eye(n)) factor = ctf.solve_tri(L,b.reshape((n,1)),True,True,False) factor = ctf.solve_tri(L,factor,True,True,True).reshape((n,)) - #[U_,S_,VT_] = ctf.svd(A) - #S_ = 1/(S_+regParam*ctf.ones(S_.shape)) - #factor.set_zero() - #factor.i("r") << VT_.i("kr")*S_.i("k")*U_.i("tk")*b.i("t") - return factor -def updateFactor_Kressner(T,U,V,W,regParam,omega,I,J,K,r,string): - - t = time.time() - - t_form_EQs = ctf.timer("ALS_form_EQs") - t_form_RHS = ctf.timer("ALS_form_RHS") - - if (string=="U"): - for i in range(I): - A = ctf.tensor((r,r)) - t_form_EQs.start() - A.i("uv") << V.i("Ju")*W.i("Ku") * omega[i,:,:].i("JK")*V.i("Jv")*W.i("Kv") - t_form_EQs.stop() - - b = ctf.tensor(r) - sliced_T = T[i,:,:] - t_form_RHS.start() - b.i("r") << V.i("Jr")*W.i("Kr") * sliced_T.i("JK") # RHS; ATb - t_form_RHS.stop() - U[i,:]= Kressner(A,b,U[i,:],r,regParam) - - return U - - if (string=="V"): - for j in range(J): - A = ctf.tensor((r,r)) - t_form_EQs.start() - A.i("uv") << U.i("Iu")*W.i("Ku") * omega[:,j,:].i("IK") * U.i("Iv")*W.i("Kv") - t_form_EQs.stop() - b = ctf.tensor(r) - t_form_RHS.start() - b.i("r") << U.i("Ir")*W.i("Kr") * T[:,j,:].i("IK") # RHS; ATb - t_form_EQs.stop() - V[j,:] = Kressner(A,b,V[j,:],r,regParam) - - return V - - if (string=="W"): - for k in range(K): - A= ctf.tensor((r,r)) - t_form_EQs.start() - A.i("uv") << U.i("Iu")*V.i("Ju")*omega[:,:,k].i("IJ")*U.i("Iv")*V.i("Jv") # LHS; ATA using matrix-vector multiplication - t_form_EQs.stop() - b = ctf.tensor(r) - t_form_RHS.start() - b.i("r") << U.i("Ir")*V.i("Jr") * T[:,:,k].i("IJ") # RHS; ATb - t_form_RHS.stop() - W[k,:] = Kressner(A,b,W[k,:],r,regParam) - - return W - - -def getALS_CG(T,U,V,W,regParam,omega,I,J,K,r,block,num_iter=100,err_thresh=.001): +def getALS_CG(T,U,V,W,regParam,omega,I,J,K,r,block_size,num_iter=100,err_thresh=.001,use_implicit=True): - if ctf.comm().rank() == 0: - print("--------------------------------ALS iterative CG------------------------") t_before_loop = time.time() - t_ALS_CG = ctf.timer_epoch("als_CG") + if use_implicit == True: + t_ALS_CG = ctf.timer_epoch("als_CG_implicit") + if ctf.comm().rank() == 0: + print("--------------------------------ALS with implicit CG------------------------") + else: + t_ALS_CG = ctf.timer_epoch("als_CG_explicit") + if ctf.comm().rank() == 0: + print("--------------------------------ALS with explicit CG------------------------") t_ALS_CG.begin() it = 0 @@ -281,14 +253,15 @@ def getALS_CG(T,U,V,W,regParam,omega,I,J,K,r,block,num_iter=100,err_thresh=.001) print('ctf.TTTP() takes {}'.format(t1-t0)) print('ctf.vecnorm {}'.format(t2-t1)) + ctf.random.seed(42) while True: t_upd_cg = ctf.timer("ALS_upd_cg") t_upd_cg.start() - U = updateFactor_CG(T,U,V,W,regParam,omega,I,J,K,r,block,"U") - V = updateFactor_CG(T,U,V,W,regParam,omega,I,J,K,r,block,"V") - W = updateFactor_CG(T,U,V,W,regParam,omega,I,J,K,r,block,"W") + U = updateFactor(T,U,V,W,regParam,omega,I,J,K,r,block_size,"U",use_implicit) + V = updateFactor(T,U,V,W,regParam,omega,I,J,K,r,block_size,"V",use_implicit) + W = updateFactor(T,U,V,W,regParam,omega,I,J,K,r,block_size,"W",use_implicit) E.set_zero() #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") @@ -306,7 +279,6 @@ def getALS_CG(T,U,V,W,regParam,omega,I,J,K,r,block,num_iter=100,err_thresh=.001) break curr_err_norm = next_err_norm - t_ALS_CG.end() @@ -316,49 +288,6 @@ def getALS_CG(T,U,V,W,regParam,omega,I,J,K,r,block,num_iter=100,err_thresh=.001) -def getALS_Kressner(T,U,V,W,regParam,omega,I,J,K,r,num_iter=100,err_thresh=.001): - - if ctf.comm().rank() == 0: - print("--------------------------------ALS direct SVD------------------------") - t_ALS_Kressner = ctf.timer_epoch("als_Kressner") - t_ALS_Kressner.begin() - - t_before_loop = time.time() - it = 0 - E = ctf.tensor((I,J,K),sp=sparse_format) - #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") - E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U,V,W]).i("ijk") - assert(E.sp == sparse_format) - curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W))*regParam - - while True: - - t = time.time() - - U = updateFactor_Kressner(T,U,V,W,regParam,omega,I,J,K,r,"U") - V = updateFactor_Kressner(T,U,V,W,regParam,omega,I,J,K,r,"V") - W = updateFactor_Kressner(T,U,V,W,regParam,omega,I,J,K,r,"W") - - E.set_zero() - #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") - E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U,V,W]).i("ijk") - assert(E.sp == sparse_format) - next_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W))*regParam - - if ctf.comm().rank() == 0: - print("Last residual:",curr_err_norm,"New residual",next_err_norm) - it +=1 - - if abs(curr_err_norm - next_err_norm) < err_thresh or it > num_iter: - break - curr_err_norm = next_err_norm - - t_ALS_Kressner.end() - if glob_comm.rank() == 0: - print('Time/Iteration: {}'.format((time.time() - t_before_loop)/1)) - print("Number of iterations: %d" % (it)) - - def getOmega_old(T,I,J,K): if (T.sp==False): @@ -384,9 +313,11 @@ def main(): r = 2 sparsity = .000001 regParam = .1 - block = 100 + block_size = 100 num_iter = 20 err_thresh = .001 + run_implicit = 1 + run_explicit = 1 if len(sys.argv) >= 4: I = int(sys.argv[1]) J = int(sys.argv[2]) @@ -396,16 +327,20 @@ def main(): if len(sys.argv) >= 6: r = int(sys.argv[5]) if len(sys.argv) >= 7: - block = int(sys.argv[6]) + block_size = int(sys.argv[6]) if len(sys.argv) >= 8: num_iter = int(sys.argv[7]) if len(sys.argv) >= 9: err_thresh = np.float64(sys.argv[8]) if len(sys.argv) >= 10: regParam= np.float64(sys.argv[9]) + if len(sys.argv) >= 11: + run_implicit= int(sys.argv[10]) + if len(sys.argv) >= 12: + run_explicit= int(sys.argv[11]) if glob_comm.rank() == 0: - print("I is",I,"J is",J,"K is",K,"sparisty is",sparsity,"r is",r,"block size is",block,"num_iter is",num_iter,"err_thresh is",err_thresh,"regParam is",regParam) + print("I is",I,"J is",J,"K is",K,"sparisty is",sparsity,"r is",r,"block_size is",block_size,"num_iter is",num_iter,"err_thresh is",err_thresh,"regParam is",regParam,"run_implicit",run_implicit,"run_explicit is",run_explicit) # 3rd-order tensor @@ -431,9 +366,10 @@ def main(): W_CG = ctf.copy(W_SVD) T_CG = ctf.copy(T_SVD) - - getALS_CG(T_CG,U_CG,V_CG,W_CG,regParam,omega,I,J,K,r,block,num_iter,err_thresh) - getALS_Kressner(T_SVD,U_SVD,V_SVD,W_SVD,regParam,omega,I,J,K,r,num_iter,err_thresh) + if run_implicit == True: + getALS_CG(T_CG,U_CG,V_CG,W_CG,regParam,omega,I,J,K,r,block_size,num_iter,err_thresh,True) + if run_explicit == True: + getALS_CG(T_SVD,U_SVD,V_SVD,W_SVD,regParam,omega,I,J,K,r,block_size,num_iter,err_thresh,False) diff --git a/CCD/ccd_sp.py b/CCD/ccd_sp.py index ead8461..f57c574 100644 --- a/CCD/ccd_sp.py +++ b/CCD/ccd_sp.py @@ -242,6 +242,7 @@ def main(): # R += ctf.TTTP(omega, [U[:,f+1], V[:,f+1], W[:,f+1]]) if f+1 < r: R += ctf.TTTP(omega, [U_vec_list[f+1], V_vec_list[f+1], W_vec_list[f+1]]) + t_tttp.stop() assert(R.sp) # print(time.time() - t0)