Skip to content

Commit

Permalink
Merge pull request #16 from Advestis/refactoring
Browse files Browse the repository at this point in the history
Random seed initialization moved to NMTF super class init
  • Loading branch information
chgeissler authored Nov 28, 2023
2 parents a38b309 + 57dd480 commit ea0d552
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 73 deletions.
19 changes: 10 additions & 9 deletions adnmtf/nmtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
self.leverage = leverage
self.random_state = random_state
self.verbose = verbose
np.random.seed(random_state)

def fit_transform(
self,
Expand Down Expand Up @@ -260,9 +261,9 @@ def fit_transform(
nmf_calculate_leverage = 0
nmf_use_robust_leverage = 0

if self.random_state is not None:
random_seed = self.random_state
np.random.seed(random_seed)
# if self.random_state is not None:
# random_seed = self.random_state
# np.random.seed(random_seed)

_, mt, mw, mb, mt_pct, mw_pct, diff, add_message, err_message, cancel_pressed = r_ntf_solve(
m=m,
Expand Down Expand Up @@ -516,9 +517,9 @@ def fit_transform(
ntf_left_components = self.apply_left
ntf_right_components = self.apply_right
ntf_block_components = self.apply_block
if self.random_state is not None:
random_seed = self.random_state
np.random.seed(random_seed)
# if self.random_state is not None:
# random_seed = self.random_state
# np.random.seed(random_seed)

my_status_box = get_status_box()(verbose=log_iter)

Expand Down Expand Up @@ -600,9 +601,9 @@ def fit_transform(
nmf_calculate_leverage = 0
nmf_use_robust_leverage = 0

if self.random_state is not None:
random_seed = self.random_state
np.random.seed(random_seed)
# if self.random_state is not None:
# random_seed = self.random_state
# np.random.seed(random_seed)

if update_w:
nmf_fix_user_lhe = 0
Expand Down
68 changes: 4 additions & 64 deletions adnmtf/nmtf_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,18 @@ def nmf_init(m, mmis, mt0, mw0, nc) -> Tuple[np.ndarray, np.ndarray]:
mw = np.copy(mw0)
if (mt.shape[0] == 0) or (mw.shape[0] == 0):
# Note that if there are missing values, SVD is performed on matrix imputed with 0's
np.random.seed(3)
if nc >= min(n, p):
# arpack does not accept to factorize at full rank -> need to duplicate in both dimensions to force it work
# noinspection PyTypeChecker
t, d, w = svds(
np.concatenate((np.concatenate((m, m), axis=1), np.concatenate((m, m), axis=1)), axis=0),
k=nc,
v0=np.random.uniform(size=2 * min(n, p)),
random_state=0
)
d /= 2
# svd causes mem allocation problem with large matrices
# t, d, w = np.linalg.svd(m)
# mt = t
# mw = w.T
else:
t, d, w = svds(m, k=nc, v0=np.random.uniform(size=min(n, p)))
# t, d, w = np.linalg.svd(m)
t, d, w = svds(m, k=nc, v0=np.random.uniform(size=min(n, p)), random_state=0)

mt = t[:n, :nc]
mw = w[:nc, :p].T
Expand Down Expand Up @@ -124,9 +119,6 @@ def nmf_init(m, mmis, mt0, mw0, nc) -> Tuple[np.ndarray, np.ndarray]:
mw = mw * ((mt.T @ m) / ((mt.T @ mt) @ mw.T + precision)).T
mt = mt * (m @ mw / (mt @ (mw.T @ mw) + precision))

# np.savetxt("C:/Users/paul_/PycharmProjects/nmtf_private/tests/data/datatest_W.csv", mt)
# np.savetxt("C:/Users/paul_/PycharmProjects/nmtf_private/tests/data/datatest_H.csv", mw)

return mt, mw


Expand All @@ -139,40 +131,14 @@ def init_ntf_type_1(m, mmis, n_blocks, nc, mt_nmf, mw_nmf, tolerance, log_iter,
else:
mt_nmf, mw_nmf = nmf_init(m=mstacked, mmis=mmis_stacked, mt0=mt_nmf, mw0=mw_nmf, nc=nc2)

# Quick NMF (canceled since multiplicative warm-up added in nmf_init)
# _, mt_nmf, mw_nmf, mb, diff, cancel_pressed = ntf_solve(
# m=mstacked,
# mmis=mmis_stacked,
# mt0=mt_nmf,
# mw0=mw_nmf,
# mb0=np.array([]),
# nc=nc2,
# tolerance=tolerance,
# log_iter=log_iter,
# status0=status0,
# max_iterations=10,
# nmf_fix_user_lhe=0,
# nmf_fix_user_rhe=0,
# nmf_fix_user_bhe=1,
# nmf_sparse_level=0,
# ntf_unimodal=0,
# ntf_smooth=0,
# ntf_left_components=0,
# ntf_right_components=0,
# ntf_block_components=0,
# n_blocks=1,
# nmf_priors=np.array([]),
# my_status_box=my_status_box,
# )

# Factorize Left vectors and distribute multiple factors if nc2 < nc
mt = np.zeros((n, nc))
mw = np.zeros((int(p / n_blocks), nc))
mb = np.zeros((n_blocks, nc))
n_fact = int(np.ceil(nc / n_blocks))
for k in range(0, nc2):
my_status_box.update_status(status="Start SVD...")
u, d, v = svds(np.reshape(mt_nmf[:, k], (int(p / n_blocks), n)).T, k=n_fact)
u, d, v = svds(np.reshape(mt_nmf[:, k], (int(p / n_blocks), n)).T, k=n_fact, random_state=0)
v = v.T
# svds returns singular vectors in reverse order
u = u[:, ::-1]
Expand Down Expand Up @@ -228,32 +194,6 @@ def init_ntf_type_2(
else:
mt_nmf, mw_nmf = nmf_init(m=m, mmis=mmis, mt0=mt_nmf, mw0=mw_nmf, nc=nc)

# Quick NMF (canceled since multiplicative warm-up added in nmf_init)
# _, mt_nmf, mw_nmf, mb, diff, cancel_pressed = ntf_solve(
# m=m,
# mmis=mmis,
# mt0=mt_nmf,
# mw0=mw_nmf,
# mb0=np.array([]),
# nc=nc,
# tolerance=tolerance,
# log_iter=log_iter,
# status0=status0,
# max_iterations=10,
# nmf_fix_user_lhe=0,
# nmf_fix_user_rhe=0,
# nmf_fix_user_bhe=1,
# nmf_sparse_level=0,
# ntf_unimodal=0,
# ntf_smooth=0,
# ntf_left_components=0,
# ntf_right_components=0,
# ntf_block_components=0,
# n_blocks=1,
# nmf_priors=np.array([]),
# my_status_box=my_status_box,
# )

# Factorize Right vectors
mt = np.zeros((n, nc))
mw = np.zeros((int(p / n_blocks), nc))
Expand All @@ -262,7 +202,7 @@ def init_ntf_type_2(
for k in range(0, nc):
my_status_box.update_status(status="Start SVD...")
# noinspection PyTypeChecker
u, d, v = svds(np.reshape(mw_nmf[:, k], (int(p / n_blocks), n_blocks)), k=1)
u, d, v = svds(np.reshape(mw_nmf[:, k], (int(p / n_blocks), n_blocks)), k=1, random_state=0)
v = v.T
u = np.abs(u)
v = np.abs(v)
Expand Down

0 comments on commit ea0d552

Please sign in to comment.