diff --git a/src/gglasso/solver/ext_admm_solver.py b/src/gglasso/solver/ext_admm_solver.py index 9ad6b1a..9236e5f 100644 --- a/src/gglasso/solver/ext_admm_solver.py +++ b/src/gglasso/solver/ext_admm_solver.py @@ -4,20 +4,35 @@ import numpy as np import time -import copy import warnings +from typing import Optional from numba import njit from numba.typed import List -from gglasso.solver.ggl_helper import phiplus, prox_od_1norm, prox_2norm, prox_rank_norm +from gglasso.solver.ggl_helper import phiplus, prox_od_1norm, prox_rank_norm from gglasso.helper.ext_admm_helper import check_G -def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ - X0 = None, X1 = None, tol = 1e-5 , rtol = 1e-4, stopping_criterion = 'boyd',\ - rho= 1., max_iter = 1000, verbose = False, measure = False, latent = False, mu1 = None): +def ext_ADMM_MGL(S: dict, + lambda1: float, + lambda2: float, + reg: str, + Omega_0: dict, + G: np.ndarray, + X0: Optional[dict]=None, + X1: Optional[dict]=None, + tol: float=1e-5, + rtol: float=1e-4, + stopping_criterion: str='boyd', + rho: float=1., + max_iter: int=1000, + verbose: bool=False, + measure: bool=False, + latent: bool=False, + mu1: Optional[float]=None + ): """ This is an ADMM algorithm for solving the Group Graphical Lasso problem where not all instances have the same number of dimensions, i.e. some variables are present in some instances and not in others. @@ -108,7 +123,7 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ """ K = len(S.keys()) - p = np.zeros(K, dtype= int) + p = np.zeros(K, dtype=int) for k in np.arange(K): p[k] = S[k].shape[0] @@ -128,14 +143,13 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ assert rho > 0, "ADMM penalization parameter must be positive." - # initialize Omega_t = Omega_0.copy() Theta_t = Omega_0.copy() L_t = dict() for k in np.arange(K): - L_t[k] = np.zeros((p[k],p[k])) + L_t[k] = np.zeros((p[k], p[k])) # helper and dual variables Lambda_t = Omega_0.copy() @@ -144,18 +158,17 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ if X0 is None: X0_t = dict() for k in np.arange(K): - X0_t[k] = np.zeros((p[k],p[k])) + X0_t[k] = np.zeros((p[k], p[k])) else: X0_t = X0.copy() if X1 is None: X1_t = dict() for k in np.arange(K): - X1_t[k] = np.zeros((p[k],p[k])) + X1_t[k] = np.zeros((p[k], p[k])) else: X1_t = X1.copy() - runtime = np.zeros(max_iter) residual = np.zeros(max_iter) status = '' @@ -183,22 +196,22 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ # Omega Update Omega_t_1 = Omega_t.copy() for k in np.arange(K): - W_t = Theta_t[k] - L_t[k] - X0_t[k] - (1/rho) * S[k] + W_t = Theta_t[k] - L_t[k] - X0_t[k] - (1/rho)*S[k] eigD, eigQ = np.linalg.eigh(W_t) - Omega_t[k] = phiplus(beta = 1/rho, D = eigD, Q = eigQ) + Omega_t[k] = phiplus(beta=1/rho, D=eigD, Q=eigQ) # Theta Update for k in np.arange(K): V_t = (Omega_t[k] + L_t[k] + X0_t[k] + Lambda_t[k] - X1_t[k]) * 0.5 Theta_t[k] = prox_od_1norm(V_t, lambda1[k]/(2*rho)) - #L Update + # L Update if latent: for k in np.arange(K): C_t = Theta_t[k] - X0_t[k] - Omega_t[k] C_t = (C_t.T + C_t)/2 eigD, eigQ = np.linalg.eigh(C_t) - L_t[k] = prox_rank_norm(C_t, mu1[k]/rho, D = eigD, Q = eigQ) + L_t[k] = prox_rank_norm(C_t, mu1[k]/rho, D=eigD, Q=eigQ) # Lambda Update Lambda_t_1 = Lambda_t.copy() @@ -218,10 +231,19 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ # Stopping condition if stopping_criterion == 'boyd': - r_t,s_t,e_pri,e_dual = ADMM_stopping_criterion(Omega_t, Omega_t_1, Theta_t, L_t, Lambda_t, Lambda_t_1, X0_t, X1_t,\ - S, rho, p, tol, rtol, latent) + r_t, s_t, e_pri, e_dual = ADMM_stopping_criterion(Omega_t, + Omega_t_1, + Theta_t, + L_t, + Lambda_t, + Lambda_t_1, + X0_t, + X1_t, + S, + rho, p, tol, rtol, latent + ) - residual[iter_t] = max(r_t,s_t) + residual[iter_t] = max(r_t, s_t) if verbose: print(out_fmt % (iter_t,r_t,s_t,e_pri,e_dual)) @@ -231,18 +253,25 @@ def ext_ADMM_MGL(S, lambda1, lambda2, reg , Omega_0, G,\ break elif stopping_criterion == 'kkt': - eta_A = kkt_stopping_criterion(Omega_t, Theta_t, L_t, Lambda_t, dict((k, rho*v) for k,v in X0_t.items()), dict((k, rho*v) for k,v in X1_t.items()),\ - S , G, lambda1, lambda2, reg, latent, mu1) + eta_A = kkt_stopping_criterion(Omega_t, + Theta_t, + L_t, + Lambda_t, + dict((k, rho*v) for k,v in X0_t.items()), + dict((k, rho*v) for k,v in X1_t.items()), + S, + G, + lambda1, lambda2, reg, latent, mu1 + ) residual[iter_t] = eta_A if verbose: - print(out_fmt % (iter_t,eta_A)) + print(out_fmt % (iter_t, eta_A)) if eta_A <= tol: status = 'optimal' break - ################################################################## ### MAIN LOOP FINISHED ################################################################## @@ -301,7 +330,6 @@ def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, Lambda, Lambda_t_1, X0, for k in np.arange(K): assert np.all(L[k]==0) - dim = ((p ** 2 + p) / 2).sum() # number of elements of off-diagonal matrix D1 = np.sqrt(sum([np.linalg.norm(Omega[k])**2 + np.linalg.norm(Lambda[k])**2 for k in np.arange(K)] )) @@ -311,13 +339,12 @@ def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, Lambda, Lambda_t_1, X0, e_pri = dim * eps_abs + eps_rel * np.maximum(D1, D2) e_dual = dim * eps_abs + eps_rel * rho * D3 - r = np.sqrt(sum([np.linalg.norm(Omega[k] - Theta[k] + L[k])**2 + np.linalg.norm(Lambda[k] - Theta[k])**2 for k in np.arange(K)] )) s = rho * np.sqrt(sum([np.linalg.norm(Omega[k] - Omega_t_1[k])**2 + np.linalg.norm(Lambda[k] - Lambda_t_1[k])**2 for k in np.arange(K)] )) - return r,s,e_pri,e_dual + return r, s, e_pri, e_dual -def kkt_stopping_criterion(Omega, Theta, L, Lambda, X0, X1, S , G, lambda1, lambda2, reg, latent = False, mu1 = None): +def kkt_stopping_criterion(Omega, Theta, L, Lambda, X0, X1, S , G, lambda1, lambda2, reg, latent=False, mu1=None): # X0, X1 are inputed as UNscaled dual variables(!) K = len(S.keys()) @@ -337,26 +364,31 @@ def kkt_stopping_criterion(Omega, Theta, L, Lambda, X0, X1, S , G, lambda1, lamb eigD, eigQ = np.linalg.eigh(Omega[k] - S[k] - X0[k]) proxk = phiplus(beta = 1, D = eigD, Q = eigQ) # primal varibale optimality - term1[k] = np.linalg.norm(Omega[k] - proxk) / (1 + np.linalg.norm(Omega[k])) - term2[k] = np.linalg.norm(Theta[k] - prox_od_1norm(Theta[k] + X0[k] - X1[k] , lambda1[k])) / (1 + np.linalg.norm(Theta[k])) + term1[k] = np.linalg.norm(Omega[k] - proxk) / (1+np.linalg.norm(Omega[k])) + term2[k] = np.linalg.norm(Theta[k] - prox_od_1norm(Theta[k] + X0[k] - X1[k] , lambda1[k])) / (1+np.linalg.norm(Theta[k])) if latent: eigD, eigQ = np.linalg.eigh(L[k] - X0[k]) - proxk = prox_rank_norm(L[k] - X0[k], beta = mu1[k], D = eigD, Q = eigQ) - term3[k] = np.linalg.norm(L[k] - proxk) / (1 + np.linalg.norm(L[k])) + proxk = prox_rank_norm(L[k] - X0[k], beta=mu1[k], D=eigD, Q=eigQ) + term3[k] = np.linalg.norm(L[k] - proxk) / (1+np.linalg.norm(L[k])) V[k] = Lambda[k] + X1[k] # equality constraints - term5[k] = np.linalg.norm(Omega[k] - Theta[k] + L[k]) / (1 + np.linalg.norm(Theta[k])) - term6[k] = np.linalg.norm(Lambda[k] - Theta[k]) / (1 + np.linalg.norm(Theta[k])) - + term5[k] = np.linalg.norm(Omega[k] - Theta[k] + L[k]) / (1+np.linalg.norm(Theta[k])) + term6[k] = np.linalg.norm(Lambda[k] - Theta[k]) / (1+np.linalg.norm(Theta[k])) V = prox_2norm_G(V, G, lambda2) for k in np.arange(K): - term4[k] = np.linalg.norm(V[k] - Lambda[k]) / (1 + np.linalg.norm(Lambda[k])) - - res = max(np.linalg.norm(term1), np.linalg.norm(term2), np.linalg.norm(term3), np.linalg.norm(term4), np.linalg.norm(term5), np.linalg.norm(term6) ) + term4[k] = np.linalg.norm(V[k] - Lambda[k]) / (1+np.linalg.norm(Lambda[k])) + + res = max(np.linalg.norm(term1), + np.linalg.norm(term2), + np.linalg.norm(term3), + np.linalg.norm(term4), + np.linalg.norm(term5), + np.linalg.norm(term6) + ) return res def prox_2norm_G(X, G, l2): @@ -375,14 +407,13 @@ def prox_2norm_G(X, G, l2): assert d[0] == 2 assert d[2] == K - group_size = (G[0,:,:] != -1).sum(axis = 1) + group_size = (G[0,:,:] != -1).sum(axis=1) tmpX = List() for k in np.arange(K): tmpX.append(X[k].copy()) X1 = prox_G_inner(G, tmpX, l2, group_size) - X1 = dict(zip(np.arange(K), X1)) return X1 @@ -402,13 +433,12 @@ def prox_G_inner(G, X, l2, group_size): v0[k] = np.nan else: v0[k] = X[k][G[0,l,k], G[1,l,k]] - - + v = v0[~np.isnan(v0)] # scale with square root of the group size lam = l2 * np.sqrt(group_size[l]) a = max(np.sqrt((v**2).sum()), lam) - z0 = v * (a - lam) / a + z0 = v * (a-lam)/a v0[~np.isnan(v0)] = z0 @@ -420,52 +450,4 @@ def prox_G_inner(G, X, l2, group_size): # lower triangular X[k][G[1,l,k], G[0,l,k]] = v0[k] - return X - - -#%% -# prox operato in case numba version does not work - -# def prox_2norm_G(X, G, l2): -# """ -# calculates the proximal operator at points X for the group penalty induced by G -# G: 2xLxK matrix where the -th row contains the (i,j)-index of the element in Theta^k which contains to group l -# if G has a entry -1 no element is contained in the group for this Theta^k -# X: dictionary with X^k at key k, each X[k] is assumed to be symmetric -# """ -# assert l2 > 0 -# K = len(X.keys()) -# for k in np.arange(K): -# assert abs(X[k] - X[k].T).max() <= 1e-5, "X[k] has to be symmetric" - -# d = G.shape -# assert d[0] == 2 -# assert d[2] == K -# L = d[1] - -# X1 = copy.deepcopy(X) -# group_size = (G[0,:,:] != -1).sum(axis = 1) - -# for l in np.arange(L): -# # for each group construct v, calculate prox, and insert the result. Ignore -1 entries of G -# v0 = np.zeros(K) -# for k in np.arange(K): -# if G[0,l,k] == -1: -# v0[k] = np.nan -# else: -# v0[k] = X[k][G[0,l,k], G[1,l,k]] - -# v = v0[~np.isnan(v0)] -# # scale with square root of the group size -# z0 = prox_2norm(v,l2 * np.sqrt(group_size[l])) -# v0[~np.isnan(v0)] = z0 - -# for k in np.arange(K): -# if G[0,l,k] == -1: -# continue -# else: -# X1[k][G[0,l,k], G[1,l,k]] = v0[k] -# # lower triangular -# X1[k][G[1,l,k], G[0,l,k]] = v0[k] - -# return X1 \ No newline at end of file + return X \ No newline at end of file diff --git a/src/gglasso/solver/functional_sgl_admm.py b/src/gglasso/solver/functional_sgl_admm.py index db9b166..f5e8ac5 100755 --- a/src/gglasso/solver/functional_sgl_admm.py +++ b/src/gglasso/solver/functional_sgl_admm.py @@ -5,13 +5,26 @@ import numpy as np import time import warnings +from typing import Optional from .ggl_helper import prox_sum_Frob, phiplus, prox_rank_norm - -def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), - rho=1., max_iter=1000, tol=1e-7, rtol=1e-4,\ - update_rho=True, verbose=False, measure=False, latent=False, mu1=None): +def ADMM_FSGL(S: np.ndarray, + lambda1: float, + M: int, + Omega_0: np.ndarray, + Theta_0: np.ndarray=np.array([]), + X_0: np.ndarray=np.array([]), + rho: float=1., + max_iter: int=1000, + tol: float=1e-7, + rtol: float=1e-4, + update_rho: bool=True, + verbose: bool=False, + measure: bool=False, + latent: bool=False, + mu1: Optional[float]=None + ): """ This is an ADMM solver for the (Latent variable) Functional Single Graphical Lasso problem (FSGL). It solves a SGL problem for the case when each of the ``p`` variables has an ``M``-dimensional functional representation (e.g. Fourier transform). @@ -80,14 +93,13 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), assert S.shape[0] == S.shape[1] assert lambda1 > 0 - if latent: assert mu1 is not None assert mu1 > 0 - (pM,pM) = S.shape + (pM, pM) = S.shape - assert pM%M == 0 + assert pM % M == 0 p = int(pM/M) if verbose: @@ -111,7 +123,6 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), residual = np.zeros(max_iter) status = '' - if verbose: print("------------ADMM Algorithm for Functional Single Graphical Lasso----------------") @@ -126,12 +137,11 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), if measure: start = time.time() - # Omega Update - W_t = Theta_t - L_t - X_t - (1 / rho) * S + W_t = Theta_t - L_t - X_t - (1/rho)*S eigD, eigQ = np.linalg.eigh(W_t) Omega_t_1 = Omega_t.copy() - Omega_t = phiplus(beta=1 / rho, D=eigD, Q=eigQ) + Omega_t = phiplus(beta=1/rho, D=eigD, Q=eigQ) # Theta Update Theta_t = prox_sum_Frob(Omega_t + L_t + X_t, M, (1/rho)*lambda1) @@ -145,14 +155,19 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), # X Update X_t = X_t + Omega_t - Theta_t + L_t - if measure: end = time.time() runtime[iter_t] = end - start # Stopping criterion - r_t,s_t,e_pri,e_dual = ADMM_stopping_criterion(Omega_t, Omega_t_1, Theta_t, L_t, X_t,\ - S, rho, tol, rtol, latent) + r_t, s_t, e_pri, e_dual = ADMM_stopping_criterion(Omega_t, + Omega_t_1, + Theta_t, + L_t, + X_t, + S, + rho, tol, rtol, latent + ) # update rho if update_rho: @@ -167,8 +182,7 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), X_t = (rho/rho_new)*X_t rho = rho_new - - residual[iter_t] = max(r_t,s_t) + residual[iter_t] = max(r_t, s_t) if verbose: print(out_fmt % (iter_t,r_t,s_t,e_pri,e_dual,rho)) @@ -176,8 +190,6 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), status = 'optimal' break - - ################################################################## ### MAIN LOOP FINISHED ################################################################## @@ -226,20 +238,18 @@ def ADMM_FSGL(S, lambda1, M, Omega_0, Theta_0=np.array([]), X_0=np.array([]), return sol, info - def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, X, S, rho, eps_abs, eps_rel, latent=False): # X is inputed as scaled dual variable, this is accounted for by factor rho in e_dual if not latent: - assert np.all(L == 0) - - (pM,pM) = S.shape + assert np.all(L==0) + (pM, pM) = S.shape - dim = ((pM ** 2 + pM) / 2) # number of elements of off-diagonal matrix + dim = ((pM**2 + pM)/2) # number of elements of off-diagonal matrix e_pri = dim * eps_abs + eps_rel * np.maximum(np.linalg.norm(Omega), np.linalg.norm(Theta-L)) e_dual = dim * eps_abs + eps_rel * rho * np.linalg.norm(X) r = np.linalg.norm(Omega - Theta + L) s = rho*np.linalg.norm(Omega - Omega_t_1) - return r,s,e_pri,e_dual \ No newline at end of file + return r, s, e_pri, e_dual \ No newline at end of file diff --git a/src/gglasso/solver/old/ppdna_unjitted.py b/src/gglasso/solver/old/ppdna_unjitted.py index 021dcf2..58b71d5 100755 --- a/src/gglasso/solver/old/ppdna_unjitted.py +++ b/src/gglasso/solver/old/ppdna_unjitted.py @@ -1,3 +1,50 @@ +#%% +# prox operator in case numba version does not work + +# def prox_2norm_G(X, G, l2): +# """ +# calculates the proximal operator at points X for the group penalty induced by G +# G: 2xLxK matrix where the -th row contains the (i,j)-index of the element in Theta^k which contains to group l +# if G has a entry -1 no element is contained in the group for this Theta^k +# X: dictionary with X^k at key k, each X[k] is assumed to be symmetric +# """ +# assert l2 > 0 +# K = len(X.keys()) +# for k in np.arange(K): +# assert abs(X[k] - X[k].T).max() <= 1e-5, "X[k] has to be symmetric" + +# d = G.shape +# assert d[0] == 2 +# assert d[2] == K +# L = d[1] + +# X1 = copy.deepcopy(X) +# group_size = (G[0,:,:] != -1).sum(axis = 1) + +# for l in np.arange(L): +# # for each group construct v, calculate prox, and insert the result. Ignore -1 entries of G +# v0 = np.zeros(K) +# for k in np.arange(K): +# if G[0,l,k] == -1: +# v0[k] = np.nan +# else: +# v0[k] = X[k][G[0,l,k], G[1,l,k]] + +# v = v0[~np.isnan(v0)] +# # scale with square root of the group size +# z0 = prox_2norm(v,l2 * np.sqrt(group_size[l])) +# v0[~np.isnan(v0)] = z0 + +# for k in np.arange(K): +# if G[0,l,k] == -1: +# continue +# else: +# X1[k][G[0,l,k], G[1,l,k]] = v0[k] +# # lower triangular +# X1[k][G[1,l,k], G[0,l,k]] = v0[k] + +# return X1 + ############################ ## some old snippets befor jitting ############################ diff --git a/src/gglasso/solver/single_admm_solver.py b/src/gglasso/solver/single_admm_solver.py index 0101a3f..51dcc1e 100644 --- a/src/gglasso/solver/single_admm_solver.py +++ b/src/gglasso/solver/single_admm_solver.py @@ -7,13 +7,28 @@ from scipy.sparse.csgraph import connected_components from scipy.linalg import block_diag import warnings +from typing import Optional from .ggl_helper import prox_od_1norm, phiplus, prox_rank_norm -def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), - rho=1., max_iter=1000, tol=1e-7, rtol=1e-4, stopping_criterion='boyd',\ - update_rho=True, verbose=False, measure=False, latent=False, mu1=None, lambda1_mask=None): +def ADMM_SGL(S: np.ndarray, + lambda1: float, + Omega_0: np.ndarray, + Theta_0: np.ndarray=np.array([]), + X_0: np.ndarray=np.array([]), + rho: float=1., + max_iter: int=1000, + tol: float=1e-7, + rtol: float=1e-4, + stopping_criterion: str='boyd', + update_rho: bool=True, + verbose: bool=False, + measure: bool=False, + latent: bool=False, + mu1: Optional[float]=None, + lambda1_mask: Optional[np.ndarray]=None + ): """ This is an ADMM solver for the (Latent variable) Single Graphical Lasso problem (SGL). If ``latent=False``, this function solves @@ -83,8 +98,8 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), info : dict status and measurement information from the solver. """ - assert Omega_0.shape == S.shape - assert S.shape[0] == S.shape[1] + assert Omega_0.shape==S.shape + assert S.shape[0]==S.shape[1] (p, p) = S.shape @@ -92,13 +107,13 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), # use lambda1_mask if specified if lambda1_mask is not None: - assert lambda1_mask.shape == (p,p), f"lambda1_mask needs to be of shape (p,p), but is {lambda1_mask.shape}." - assert np.all(lambda1_mask >=0 ), "lambda1_mask needs to be non-negative." + assert lambda1_mask.shape==(p,p), f"lambda1_mask needs to be of shape (p,p), but is {lambda1_mask.shape}." + assert np.all(lambda1_mask>=0), "lambda1_mask needs to be non-negative." assert np.all(np.abs(lambda1_mask.T - lambda1_mask) <= 1e-5), "lambda1_mask needs to be symmetric." lambda1 = lambda1 * lambda1_mask - assert np.all(lambda1 >= 0) + assert np.all(lambda1>=0) assert stopping_criterion in ["boyd", "kkt"] @@ -111,9 +126,9 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), # initialize Omega_t = Omega_0.copy() - if len(Theta_0) == 0: + if len(Theta_0)==0: Theta_0 = Omega_0.copy() - if len(X_0) == 0: + if len(X_0)==0: X_0 = np.zeros((p, p)) Theta_t = Theta_0.copy() @@ -124,7 +139,6 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), residual = np.zeros(max_iter) status = '' - if verbose: print("------------ADMM Algorithm for Single Graphical Lasso----------------") @@ -149,31 +163,34 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), W_t = Theta_t - L_t - X_t - (1 / rho) * S eigD, eigQ = np.linalg.eigh(W_t) Omega_t_1 = Omega_t.copy() - Omega_t = phiplus(beta=1 / rho, D=eigD, Q=eigQ) + Omega_t = phiplus(beta=1/rho, D=eigD, Q=eigQ) # Theta Update - Theta_t = prox_od_1norm(Omega_t + L_t + X_t, (1 / rho) * lambda1) + Theta_t = prox_od_1norm(Omega_t + L_t + X_t, (1/rho) * lambda1) # L Update if latent: C_t = Theta_t - X_t - Omega_t - # C_t = (C_t.T + C_t)/2 eigD1, eigQ1 = np.linalg.eigh(C_t) L_t = prox_rank_norm(C_t, mu1/rho, D=eigD1, Q=eigQ1) # X Update X_t = X_t + Omega_t - Theta_t + L_t - - if measure: end = time.time() runtime[iter_t] = end - start # Stopping criterion if stopping_criterion == 'boyd': - r_t,s_t,e_pri,e_dual = ADMM_stopping_criterion(Omega_t, Omega_t_1, Theta_t, L_t, X_t,\ - S, rho, tol, rtol, latent) + r_t, s_t, e_pri, e_dual = ADMM_stopping_criterion(Omega_t, + Omega_t_1, + Theta_t, + L_t, + X_t, + S, + rho, tol, rtol, latent + ) # update rho if update_rho: @@ -187,27 +204,25 @@ def ADMM_SGL(S, lambda1, Omega_0, Theta_0=np.array([]), X_0=np.array([]), # rescale dual variables X_t = (rho/rho_new)*X_t rho = rho_new - - + residual[iter_t] = max(r_t,s_t) if verbose: - print(out_fmt % (iter_t,r_t,s_t,e_pri,e_dual)) + print(out_fmt % (iter_t, r_t, s_t, e_pri, e_dual)) if (r_t <= e_pri) and (s_t <= e_dual): status = 'optimal' break elif stopping_criterion == 'kkt': - eta_A = kkt_stopping_criterion(Omega_t, Theta_t, L_t, rho * X_t, S, lambda1, latent, mu1) + eta_A = kkt_stopping_criterion(Omega_t, Theta_t, L_t, rho*X_t, S, lambda1, latent, mu1) residual[iter_t] = eta_A if verbose: - print(out_fmt % (iter_t,eta_A)) + print(out_fmt % (iter_t, eta_A)) if eta_A <= tol: status = 'optimal' break - ################################################################## ### MAIN LOOP FINISHED ################################################################## @@ -266,7 +281,6 @@ def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, X, S, rho, eps_abs, eps_ (p, p) = S.shape - dim = ((p ** 2 + p) / 2) # number of elements of off-diagonal matrix e_pri = dim * eps_abs + eps_rel * np.maximum(np.linalg.norm(Omega), np.linalg.norm(Theta -L)) e_dual = dim * eps_abs + eps_rel * rho * np.linalg.norm(X) @@ -274,7 +288,7 @@ def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, X, S, rho, eps_abs, eps_ r = np.linalg.norm(Omega - Theta + L) s = rho*np.linalg.norm(Omega - Omega_t_1) - return r,s,e_pri,e_dual + return r, s, e_pri, e_dual def kkt_stopping_criterion(Omega, Theta, L, X, S, lambda1, latent=False, mu1=None): assert Omega.shape == Theta.shape == S.shape @@ -285,19 +299,20 @@ def kkt_stopping_criterion(Omega, Theta, L, X, S, lambda1, latent=False, mu1=Non (p, p) = S.shape - term1 = np.linalg.norm(Theta - prox_od_1norm(Theta + X, l=lambda1)) / (1 + np.linalg.norm(Theta)) + term1 = np.linalg.norm(Theta - prox_od_1norm(Theta+X, + l=lambda1)) / (1+np.linalg.norm(Theta)) - term2 = np.linalg.norm(Omega - Theta + L) / (1 + np.linalg.norm(Theta)) + term2 = np.linalg.norm(Omega - Theta + L) / (1+np.linalg.norm(Theta)) eigD, eigQ = np.linalg.eigh(Omega - S - X) proxO = phiplus(beta=1, D=eigD, Q=eigQ) - term3 = np.linalg.norm(Omega - proxO) / (1 + np.linalg.norm(Omega)) + term3 = np.linalg.norm(Omega - proxO) / (1+np.linalg.norm(Omega)) term4 = 0 if latent: eigD, eigQ = np.linalg.eigh(L - X) - proxL = prox_rank_norm(A=L - X, beta=mu1, D=eigD, Q=eigQ) - term4 = np.linalg.norm(L - proxL) / (1 + np.linalg.norm(L)) + proxL = prox_rank_norm(A=L-X, beta=mu1, D=eigD, Q=eigQ) + term4 = np.linalg.norm(L - proxL) / (1+np.linalg.norm(L)) residual = max(term1, term2, term3, term4) @@ -308,9 +323,21 @@ def kkt_stopping_criterion(Omega, Theta, L, X, S, lambda1, latent=False, mu1=Non ## BLOCK-WISE GRAPHICAL LASSO AFTER WITTEN ET AL. ####################################################### -def block_SGL(S, lambda1, Omega_0, Theta_0=None, X_0=None, rho=1., max_iter=1000, - tol=1e-7, rtol=1e-3, stopping_criterion="boyd", - update_rho=True, verbose=False, measure=False, lambda1_mask=None): +def block_SGL(S: np.ndarray, + lambda1: float, + Omega_0: np.ndarray, + Theta_0: Optional[np.ndarray]=None, + X_0: Optional[np.ndarray]=None, + rho: float=1., + max_iter: int=1000, + tol: float=1e-7, + rtol: float=1e-3, + stopping_criterion: str="boyd", + update_rho: bool=True, + verbose: bool=False, + measure: bool=False, + lambda1_mask: Optional[np.ndarray]=None + ): """ This is a wrapper for solving SGL problems on connected components of the solution and solving each block separately. See Witten, Friedman, Simon "New Insights for the Graphical Lasso" for details. @@ -380,11 +407,11 @@ def block_SGL(S, lambda1, Omega_0, Theta_0=None, X_0=None, rho=1., max_iter=1000 # use lambda1_mask if specified if lambda1_mask is not None: - assert lambda1_mask.shape == (p,p), f"lambda1_mask needs to be of shape (p,p), but is {lambda1_mask.shape}." + assert lambda1_mask.shape == (p, p), f"lambda1_mask needs to be of shape (p,p), but is {lambda1_mask.shape}." assert np.all(lambda1_mask >= 0), "lambda1_mask needs to be non-negative." assert np.all(np.abs(lambda1_mask.T - lambda1_mask) <= 1e-5), "lambda1_mask needs to be symmetric." else: - lambda1_mask = np.ones((p,p)) + lambda1_mask = np.ones((p, p)) if Theta_0 is None: Theta_0 = Omega_0.copy() @@ -410,16 +437,26 @@ def block_SGL(S, lambda1, Omega_0, Theta_0=None, X_0=None, rho=1., max_iter=1000 allTheta.append(closed_sol) allX.append(np.array([0])) - # else solve Graphical Lasso for the corresponding block else: block_S = S[np.ix_(C, C)] - this_lambda1_mask = lambda1_mask[np.ix_(C,C)] + this_lambda1_mask = lambda1_mask[np.ix_(C, C)] - block_sol, block_info = ADMM_SGL(S=block_S, lambda1=lambda1, Omega_0=Omega_0[np.ix_(C, C)], - Theta_0=Theta_0[np.ix_(C, C)], X_0=X_0[np.ix_(C, C)], tol=tol, rtol=rtol, - stopping_criterion=stopping_criterion, update_rho=update_rho, - rho=rho, max_iter=max_iter, verbose=verbose, measure=measure, lambda1_mask=this_lambda1_mask) + block_sol, block_info = ADMM_SGL(S=block_S, + lambda1=lambda1, + Omega_0=Omega_0[np.ix_(C, C)], + Theta_0=Theta_0[np.ix_(C, C)], + X_0=X_0[np.ix_(C, C)], + tol=tol, + rtol=rtol, + stopping_criterion=stopping_criterion, + update_rho=update_rho, + rho=rho, + max_iter=max_iter, + verbose=verbose, + measure=measure, + lambda1_mask=this_lambda1_mask + ) allOmega.append(block_sol['Omega']) allTheta.append(block_sol['Theta']) @@ -448,12 +485,10 @@ def get_connected_components(S, lambda1): for i in range(numC): # need hstack for avoiding redundant dimensions thisC = np.hstack(np.argwhere(labelsC == i)) - allC.append(thisC) return numC, allC - def invert_permutation(p): """The argument p is assumed to be some permutation of 0, 1, ..., len(p)-1. Returns an array s, where s[i] gives the index of i in p.