Skip to content

Commit

Permalink
clean-up and new test file
Browse files Browse the repository at this point in the history
  • Loading branch information
martinroyer committed Apr 14, 2018
1 parent 10ee2db commit 5107b10
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 54 deletions.
9 changes: 5 additions & 4 deletions pecok/admm.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def proj_Snp_imp(Y):
return Y


def pecok_admm(relational_data, K, n_iter_max=-1):
def pecok_admm(relational_data, K, n_iter_max=-1, rho=1, mat_init=None):
"""Implementation of Alternating Direction Method of Multipliers
Parameters
Expand All @@ -58,10 +58,11 @@ def pecok_admm(relational_data, K, n_iter_max=-1):
n_samples,_ = relational_data.shape
if n_iter_max < 0:
n_iter_max = np.max((1000,2*n_samples))
rho = 10.0
relational_data = relational_data / np.linalg.norm(relational_data)

X, Y, Z = np.identity(n_samples), np.identity(n_samples), np.identity(n_samples)
if mat_init is not None:
X, Y, Z = mat_init, mat_init, mat_init
X = X + 0.2*np.random.random((n_samples, n_samples))
U, V, W = np.zeros((n_samples,n_samples)), np.zeros((n_samples,n_samples)), np.zeros((n_samples,n_samples))
Xbar = (X + Y + Z)/3
Expand Down Expand Up @@ -90,8 +91,8 @@ def pecok_admm(relational_data, K, n_iter_max=-1):


def is_primal_high(res_primal, X, Y, Z):
return res_primal > 1e-4 * np.max((np.linalg.norm(X), np.linalg.norm(Y), np.linalg.norm(Z)))
return res_primal > 1e-3 * np.max((np.linalg.norm(X), np.linalg.norm(Y), np.linalg.norm(Z)))


def is_dual_high(res_dual, Y, Z):
return res_dual > 1e-4 * (np.sqrt(Y.shape[0]) + np.linalg.norm(Y) + np.linalg.norm(Z))
return res_dual > 1e-3 * (np.sqrt(Y.shape[0]) + np.linalg.norm(Y) + np.linalg.norm(Z))
26 changes: 0 additions & 26 deletions pecok/pecok_clustering.py

This file was deleted.

74 changes: 74 additions & 0 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Test for variable and point clustering"""

# author: Martin Royer <[email protected]>
# License: MIT

import numpy as np
from sklearn import cluster
import scipy.sparse.linalg as ssl

from pecok import gamma, admm


def hierarchial_clustering(obs, n_struct):
hclustering = cluster.AgglomerativeClustering(linkage='ward', n_clusters=n_struct)
return hclustering.fit(obs, n_struct)


def kmeans_clustering(obs, n_struct):
k_means = cluster.KMeans(n_clusters=n_struct, init='k-means++', n_init=100)
return k_means.fit(obs)


def spectral_clustering(obs, n_struct):
approx, _, _ = ssl.svds(obs, k=n_struct)
return hierarchial_clustering(approx, n_struct)


def pecok_clustering(obs, n_struct, rho=5):
gram_corrected = (obs.T.dot(obs) - np.diag(gamma.gamma_hat4(obs.T))) / obs.shape[0]
U, _, V = ssl.svds(gram_corrected, k=n_struct)
Bhat = admm.pecok_admm(gram_corrected, K=n_struct, rho=rho, mat_init=U.dot(V))
return hierarchial_clustering(Bhat, n_struct=n_struct)


seed = 432
np.random.seed(seed)
print("seed is %i" % seed)

n_var = 10
n_obs = 100

print("\nVAR CLUSTERING\n\n")

truth = np.asmatrix(np.concatenate((np.repeat(0, n_var//2), np.repeat(1, n_var//2))))
membership = truth.T.dot(np.matrix([1, 0])) + (1-truth).T.dot(np.matrix([0, 1]))
stds = np.ones(n_var)
stds[:(n_var//2)] = 0.1
sigma = membership.dot(0.1*np.identity(2)).dot(membership.T) + np.diag(stds)
mat_data = np.random.multivariate_normal(mean=np.zeros(n_var), cov=sigma, size=n_obs)
gram_data = mat_data.T.dot(mat_data) / mat_data.shape[0]

print("truth:".ljust(15), truth)
print("hierarchical:".ljust(15), hierarchial_clustering(mat_data.T, n_struct=2).labels_)
print("kmeans:".ljust(15), kmeans_clustering(mat_data.T, n_struct=2).labels_)
print("spectral:".ljust(15), spectral_clustering(gram_data, n_struct=2).labels_)
print("pecok:".ljust(15), pecok_clustering(mat_data, n_struct=2).labels_)

print("\nPOINT CLUSTERING\n\n")

n_var = 100
n_obs = 10

truth = np.asmatrix(np.concatenate((np.repeat(0, n_obs//2), np.repeat(1, n_obs//2))))
X = np.zeros((n_obs, n_var))
snr = 0.3
X[:n_obs//2, :] = np.ones(n_var)*snr + np.random.normal(scale=1, size=(n_obs//2, n_var))
X[n_obs//2:, :] = -np.ones(n_var)*snr + np.random.normal(scale=0.1, size=(n_obs//2, n_var))
gram = X.dot(X.T) / X.shape[1]

print("truth:".ljust(15), truth)
print("hierarchical:".ljust(15), hierarchial_clustering(X, n_struct=2).labels_)
print("kmeans:".ljust(15), kmeans_clustering(X, n_struct=2).labels_)
print("spectral:".ljust(15), spectral_clustering(gram, n_struct=2).labels_)
print("pecok:".ljust(15), pecok_clustering(X.T, n_struct=2).labels_)
24 changes: 0 additions & 24 deletions tests/test_pointclustering.py

This file was deleted.

0 comments on commit 5107b10

Please sign in to comment.