From af50ba3e3354c5a01a2a10e330c21b4a2a6373e0 Mon Sep 17 00:00:00 2001
From: Brandon Istenes <bistenes@gmail.com>
Date: Fri, 26 Jun 2020 17:39:17 -0700
Subject: [PATCH] Add auto-formatting on commit

---
 format.sh             |   4 ++
 graphpca/__init__.py  | 106 ++++++++++++++++++++++++------------------
 hooks/pre-commit      |   4 ++
 requirements.txt      |   1 +
 setup.sh              |   1 +
 test/test_graphpca.py |  89 ++++++++++++++++++++++-------------
 6 files changed, 126 insertions(+), 79 deletions(-)
 create mode 100755 format.sh
 create mode 100755 hooks/pre-commit

diff --git a/format.sh b/format.sh
new file mode 100755
index 0000000..1297ff2
--- /dev/null
+++ b/format.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+cd "$(dirname "$0")"
+./env/bin/black graphpca/*.py test/*.py
diff --git a/graphpca/__init__.py b/graphpca/__init__.py
index 22042b4..bd1d655 100644
--- a/graphpca/__init__.py
+++ b/graphpca/__init__.py
@@ -42,8 +42,9 @@ def reduce_graph(nx_graph, output_dim):
             return reduce_graph_efficiently(nx_graph, output_dim, add_supernode=True)
 
 
-def reduce_graph_efficiently(nx_graph, output_dim, add_supernode=False,
-                             eigendecomp_strategy='smart'):
+def reduce_graph_efficiently(
+    nx_graph, output_dim, add_supernode=False, eigendecomp_strategy="smart"
+):
     """
     Run PCA on the ETCD of the input NetworkX graph
 
@@ -90,38 +91,38 @@ def reduce_graph_efficiently(nx_graph, output_dim, add_supernode=False,
     :class:`numpy.ndarray`
         The reduced data in output_dim dimensions
     """
-    LOG.debug('Entering reduce_graph')
+    LOG.debug("Entering reduce_graph")
     assert output_dim < len(nx_graph)
-    LOG.info('Calculating Laplacian L')
+    LOG.info("Calculating Laplacian L")
     L = nx.laplacian_matrix(nx_graph)
-    LOG.debug('L.shape: {}'.format(L.shape))
+    LOG.debug("L.shape: {}".format(L.shape))
     if add_supernode:
         L = _add_supernode_to_laplacian(L)
-    LOG.info('Calculating nullity of L as connected components of nx_graph')
+    LOG.info("Calculating nullity of L as connected components of nx_graph")
     nullity = nx.number_connected_components(nx_graph)
-    LOG.info('Calculating smallest eigenvalues of L & corresponding eigenvectors')
-    (E, U) = _eigendecomp(eigendecomp_strategy, L, output_dim + nullity, which='SM')
-    LOG.debug('Eigenvalues: {}'.format(E))
-    LOG.info('Assembling PCA result')
+    LOG.info("Calculating smallest eigenvalues of L & corresponding eigenvectors")
+    (E, U) = _eigendecomp(eigendecomp_strategy, L, output_dim + nullity, which="SM")
+    LOG.debug("Eigenvalues: {}".format(E))
+    LOG.info("Assembling PCA result")
     # If we added a supernode, now remove it
     if add_supernode:
         # Remove data row
         U = U[:-1, :]
         # Remove eigenpair with negative value, which correspond to supernode
         neg_indexes = np.where(E < 0.0)
-        LOG.debug('Neg indexes: {}'.format(neg_indexes))
+        LOG.debug("Neg indexes: {}".format(neg_indexes))
         E = np.delete(E, neg_indexes)
         U = np.delete(U, neg_indexes, axis=1)
     # Remove the 0 eigenvalues and corresponding eigenvectors
     # Use tolerance value 10 x from numpy.linalg.matrix_rank
     tol = E.max() * max(L.shape) * np.finfo(float).eps * 10
-    LOG.debug('Using tolerance {}'.format(tol))
+    LOG.debug("Using tolerance {}".format(tol))
     zero_indexes = [i for i in range(len(E)) if abs(E[i]) < tol]
     E = np.delete(E, zero_indexes)
     U = np.delete(U, zero_indexes, axis=1)
     # Invert eigenvalues to get largest eigenvalues of L-pseudoinverse
-    Ep = 1/E
-    LOG.debug('Filtered & Inverted Eigenvalues: {}'.format(Ep))
+    Ep = 1 / E
+    LOG.debug("Filtered & Inverted Eigenvalues: {}".format(Ep))
     # Orient Eigenvectors
     _orient_eigenvectors(U)
     # Assemble into the right structure
@@ -132,7 +133,7 @@ def reduce_graph_efficiently(nx_graph, output_dim, add_supernode=False,
     return X
 
 
-def reduce_graph_naively(nx_graph, output_dim, eigendecomp_strategy='exact'):
+def reduce_graph_naively(nx_graph, output_dim, eigendecomp_strategy="exact"):
     """
     Run PCA on the ETCD of a NetworkX graph using a slow but precise method
 
@@ -165,17 +166,19 @@ def reduce_graph_naively(nx_graph, output_dim, eigendecomp_strategy='exact'):
     :class:`numpy.ndarray`
         The reduced data in output_dim dimensions
     """
-    LOG.debug('Entering naive_reduce_graph')
+    LOG.debug("Entering naive_reduce_graph")
     L = nx.laplacian_matrix(nx_graph).todense()
-    LOG.info('Calculating Moore-Penrose inverse of the Laplacian L')
+    LOG.info("Calculating Moore-Penrose inverse of the Laplacian L")
     Li = np.linalg.pinv(L)
-    LOG.info('Calculating largest eigenvalues of L-inverse & corresponding eigenvectors')
-    (E, U) = _eigendecomp(eigendecomp_strategy, Li, output_dim, which='LM')
+    LOG.info(
+        "Calculating largest eigenvalues of L-inverse & corresponding eigenvectors"
+    )
+    (E, U) = _eigendecomp(eigendecomp_strategy, Li, output_dim, which="LM")
     # Flip so largest eigen first
     E = E[::-1]
     U = np.fliplr(U)
-    LOG.debug('Eigenvalues: {}'.format(E))
-    LOG.info('Assembling PCA result')
+    LOG.debug("Eigenvalues: {}".format(E))
+    LOG.info("Assembling PCA result")
     # Assemble into the right structure
     X = np.zeros((output_dim, len(nx_graph)))
     sqrtE = np.sqrt(E)
@@ -185,7 +188,7 @@ def reduce_graph_naively(nx_graph, output_dim, eigendecomp_strategy='exact'):
 
 
 def _add_supernode_to_laplacian(L):
-    L_padded = np.ones([n+1 for n in L.shape])
+    L_padded = np.ones([n + 1 for n in L.shape])
     L_padded[:-1, :-1] = L.todense()
     return L_padded
 
@@ -195,9 +198,9 @@ def _orient_eigenvectors(U):
     for i in range(U.shape[1]):
         try:
             if next(u for u in U[:, i] if np.fabs(u) > threshold) < 0.0:
-                U[:, i] = - U[:, i]
+                U[:, i] = -U[:, i]
         except StopIteration:
-            LOG.debug('Zero eigenvector at index {}'.format(i))
+            LOG.debug("Zero eigenvector at index {}".format(i))
             continue
     return U
 
@@ -235,9 +238,9 @@ def _eigendecomp(eigendecomp_strategy, M, output_dim, which, *args, **kwargs):
         The corresponding eigenvectors of M
 
     """
-    if eigendecomp_strategy == 'exact':
+    if eigendecomp_strategy == "exact":
         return _exact_eigendecomp(M, output_dim, which)
-    elif eigendecomp_strategy == 'sparse':
+    elif eigendecomp_strategy == "sparse":
         return _sparse_eigendecomp(M, output_dim, which, *args, **kwargs)
     else:
         if M.shape[0] < 1000:
@@ -247,50 +250,58 @@ def _eigendecomp(eigendecomp_strategy, M, output_dim, which, *args, **kwargs):
 
 
 def _exact_eigendecomp(M, output_dim, which):
-    LOG.debug('Using _exact_eigendecomp')
+    LOG.debug("Using _exact_eigendecomp")
     if scipy.sparse.issparse(M):
         M = M.todense()
     E, U = scipy.linalg.eigh(M)
     # Cut out eigenpairs
-    if which == 'SM':
+    if which == "SM":
         E = E[:output_dim]
         U = U[:, :output_dim]
         U = _orient_eigenvectors(U)
-    elif which == 'LM':
-        E = E[E.shape[0] - output_dim:]
-        U = U[:, U.shape[1] - output_dim:]
+    elif which == "LM":
+        E = E[E.shape[0] - output_dim :]
+        U = U[:, U.shape[1] - output_dim :]
         U = _orient_eigenvectors(U)
     else:
-        raise NotImplementedError('Unknown setting for `which`: {}'.format(which))
+        raise NotImplementedError("Unknown setting for `which`: {}".format(which))
     return E, U
 
 
 def _sparse_eigendecomp(M, output_dim, which, tol=0.000000001, _attempt=0, **kwargs):
-    LOG.debug('Using _sparse_eigendecomp')
+    LOG.debug("Using _sparse_eigendecomp")
     try:
-        M = M.astype('d')
-        if which == 'SM':
+        M = M.astype("d")
+        if which == "SM":
             # Use shift-invert method to calculate smallest eigenpairs.
             # Use very small sigma since `sigma=0.0` fails with
             #    RuntimeError: Factor is exactly singular
-            E, U = scipy.sparse.linalg.eigsh(M, output_dim, sigma=0.00001,
-                                             which='LM', tol=tol, **kwargs)
+            E, U = scipy.sparse.linalg.eigsh(
+                M, output_dim, sigma=0.00001, which="LM", tol=tol, **kwargs
+            )
         else:
-            E, U = scipy.sparse.linalg.eigsh(M, output_dim, which=which, tol=tol, **kwargs)
+            E, U = scipy.sparse.linalg.eigsh(
+                M, output_dim, which=which, tol=tol, **kwargs
+            )
         U = _orient_eigenvectors(U)
         return E, U
     except ArpackNoConvergence as e:
         if _attempt > 2:
-          LOG.error('Eigendecomp did not converge. Bailing.')
-          raise e
+            LOG.error("Eigendecomp did not converge. Bailing.")
+            raise e
         LOG.info(e)
         new_tol = tol * 10
-        LOG.info('Eigendecomp failed to converge, retrying with tolerance {}'.format(new_tol))
-        return _sparse_eigendecomp(M, output_dim, which=which, tol=new_tol, _attempt=_attempt+1)
+        LOG.info(
+            "Eigendecomp failed to converge, retrying with tolerance {}".format(new_tol)
+        )
+        return _sparse_eigendecomp(
+            M, output_dim, which=which, tol=new_tol, _attempt=_attempt + 1
+        )
 
 
-def plot_2d(pca_output_2d, colormap_name='winter'):
+def plot_2d(pca_output_2d, colormap_name="winter"):
     import matplotlib.pyplot as plt
+
     x = pca_output_2d[0, :]
     y = pca_output_2d[1, :]
     colormap = plt.get_cmap(colormap_name)
@@ -312,9 +323,12 @@ def draw_graph(nx_graph):
         The graph to be plotted
     """
     import matplotlib.pyplot as plt
+
     reduced_2 = reduce_graph(nx_graph, 2)
     for edge in nx_graph.edges():
-        plt.plot([reduced_2[0, edge[0]], reduced_2[0, edge[1]]],
-                 [reduced_2[1, edge[0]], reduced_2[1, edge[1]]],
-                 'b-')
+        plt.plot(
+            [reduced_2[0, edge[0]], reduced_2[0, edge[1]]],
+            [reduced_2[1, edge[0]], reduced_2[1, edge[1]]],
+            "b-",
+        )
     plot_2d(reduced_2)
diff --git a/hooks/pre-commit b/hooks/pre-commit
new file mode 100755
index 0000000..6d4e66b
--- /dev/null
+++ b/hooks/pre-commit
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+$(git rev-parse --show-toplevel)/format.sh
+git update-index --again
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index da5baff..b972813 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ matplotlib
 
 # Development
 docutils
+black
diff --git a/setup.sh b/setup.sh
index 15048e5..2fe5cc8 100755
--- a/setup.sh
+++ b/setup.sh
@@ -3,3 +3,4 @@
 python3 -m venv env
 ./env/bin/pip install --upgrade pip
 ./env/bin/pip install -r requirements.txt
+ln -s ../../hooks/pre-commit .git/hooks/
\ No newline at end of file
diff --git a/test/test_graphpca.py b/test/test_graphpca.py
index 04dcfde..d1fe280 100644
--- a/test/test_graphpca.py
+++ b/test/test_graphpca.py
@@ -14,10 +14,12 @@
 
 
 def get_fixture_mat(filename):
-    return scipy.io.loadmat(os.path.dirname(os.path.realpath(__file__)) + "/" + filename)
+    return scipy.io.loadmat(
+        os.path.dirname(os.path.realpath(__file__)) + "/" + filename
+    )
 
-class TestGraphPCA(unittest.TestCase):
 
+class TestGraphPCA(unittest.TestCase):
     def test_returns_plausible_results(self):
         g = nx.erdos_renyi_graph(100, 0.3)
         g_5 = graphpca.reduce_graph_efficiently(g, 5)
@@ -39,75 +41,96 @@ def test_ok_if_multiple_zero_eigens(self):
             max_val = max(abs(g_5[i]))
             self.assertGreater(max_val, 0.01)
 
-    @unittest.skip('This fails and I have no idea why')
+    @unittest.skip("This fails and I have no idea why")
     def test_similar_output_to_naive_peterson(self):
         G = nx.petersen_graph()
         G2 = graphpca.reduce_graph_efficiently(G, 2)
         G2n = graphpca.reduce_graph_naively(G, 2)
-        self.assertTrue(np.allclose(G2, G2n, rtol=1e-04, atol=1e-06),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G2, G2n))
+        self.assertTrue(
+            np.allclose(G2, G2n, rtol=1e-04, atol=1e-06),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G2, G2n),
+        )
 
     def test_similar_output_to_naive_small(self):
         G = nx.erdos_renyi_graph(10, 0.5)
         G2 = graphpca.reduce_graph_efficiently(G, 2)
         G2n = graphpca.reduce_graph_naively(G, 2)
-        self.assertTrue(np.allclose(G2, G2n, rtol=1e-04, atol=1e-06),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G2, G2n))
+        self.assertTrue(
+            np.allclose(G2, G2n, rtol=1e-04, atol=1e-06),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G2, G2n),
+        )
 
     def test_similar_output_to_naive_mat_3(self):
-        mat = get_fixture_mat('bcspwr01.mat')
+        mat = get_fixture_mat("bcspwr01.mat")
         # I love the UFSMC (https://www.cise.ufl.edu/research/sparse/matrices/)
         # but wow they really buried the matrix in this .mat
-        A = mat['Problem'][0][0][1].todense()
+        A = mat["Problem"][0][0][1].todense()
         G = nx.from_numpy_matrix(A)
         G3 = graphpca.reduce_graph_efficiently(G, 3)
         G3n = graphpca.reduce_graph_naively(G, 3)
-        self.assertTrue(np.allclose(G3, G3n, rtol=1e-04, atol=1e-06),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G3, G3n))
+        self.assertTrue(
+            np.allclose(G3, G3n, rtol=1e-04, atol=1e-06),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G3, G3n),
+        )
 
     def test_similar_output_to_naive_big(self):
         G = nx.erdos_renyi_graph(1001, 0.02)
         G2 = graphpca.reduce_graph_efficiently(G, 2)
         G2n = graphpca.reduce_graph_naively(G, 2)
-        self.assertTrue(np.allclose(G2, G2n, rtol=1e-03, atol=1e-05),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G2, G2n))
+        self.assertTrue(
+            np.allclose(G2, G2n, rtol=1e-03, atol=1e-05),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G2, G2n),
+        )
 
     def test_add_supernode_similar_output_to_naive_small(self):
         G = nx.erdos_renyi_graph(10, 0.5)
         G2 = graphpca.reduce_graph_efficiently(G, 2, add_supernode=True)
         G2n = graphpca.reduce_graph_naively(G, 2)
-        self.assertTrue(np.allclose(G2, G2n, rtol=1e-02, atol=1e-06),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G2, G2n))
+        self.assertTrue(
+            np.allclose(G2, G2n, rtol=1e-02, atol=1e-06),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G2, G2n),
+        )
 
     def test_add_supernode_similar_output_to_naive_mat_3(self):
-        mat = get_fixture_mat('bcspwr01.mat')
-        A = mat['Problem'][0][0][1].todense()
+        mat = get_fixture_mat("bcspwr01.mat")
+        A = mat["Problem"][0][0][1].todense()
         G = nx.from_numpy_matrix(A)
         G3 = graphpca.reduce_graph_efficiently(G, 3, add_supernode=True)
         G3n = graphpca.reduce_graph_naively(G, 3)
-        self.assertTrue(np.allclose(G3, G3n, rtol=1e-02, atol=1e-06),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G3, G3n))
+        self.assertTrue(
+            np.allclose(G3, G3n, rtol=1e-02, atol=1e-06),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G3, G3n),
+        )
 
     def test_add_supernode_similar_output_to_naive_big(self):
         G = nx.watts_strogatz_graph(1001, 10, 0.05)
         G2 = graphpca.reduce_graph_efficiently(G, 2, add_supernode=True)
         G2n = graphpca.reduce_graph_naively(G, 2)
-        self.assertTrue(np.allclose(G2, G2n, rtol=1e-01, atol=1e-02),
-                        'Regular result:\n{}\nNaive result:\n{}\n'.format(G2, G2n))
+        self.assertTrue(
+            np.allclose(G2, G2n, rtol=1e-01, atol=1e-02),
+            "Regular result:\n{}\nNaive result:\n{}\n".format(G2, G2n),
+        )
 
     def test_exact_eigendomp_same_as_sparse(self):
         g = nx.erdos_renyi_graph(10, 0.5)
-        l = nx.laplacian_matrix(g).astype('d')
+        l = nx.laplacian_matrix(g).astype("d")
         # Test for smallest eigs
-        Eb, Ub = graphpca._sparse_eigendecomp(l, 4, which='SM')
-        Es, Us = graphpca._exact_eigendecomp(l, 4, which='SM')
-        self.assertTrue(np.allclose(Eb, Es), 'Big vals: {}\nSmall vals: {}\n'.format(Eb, Es))
-        self.assertTrue(np.allclose(Ub, Us, rtol=1e-09, atol=1e-09),
-                        'Big vecs:\n{}\nSmall vecs:\n{}\n'.format(Ub, Us))
+        Eb, Ub = graphpca._sparse_eigendecomp(l, 4, which="SM")
+        Es, Us = graphpca._exact_eigendecomp(l, 4, which="SM")
+        self.assertTrue(
+            np.allclose(Eb, Es), "Big vals: {}\nSmall vals: {}\n".format(Eb, Es)
+        )
+        self.assertTrue(
+            np.allclose(Ub, Us, rtol=1e-09, atol=1e-09),
+            "Big vecs:\n{}\nSmall vecs:\n{}\n".format(Ub, Us),
+        )
         # Test for biggest eigs
-        Eb, Ub = graphpca._sparse_eigendecomp(l, 4, which='LM')
-        Es, Us = graphpca._exact_eigendecomp(l, 4, which='LM')
-        self.assertTrue(np.allclose(Eb, Es), 'Big vals: {}\nSmall vals: {}\n'.format(Eb, Es))
-        self.assertTrue(np.allclose(Ub, Us, rtol=1e-09, atol=1e-09),
-                        'Big vecs:\n{}\nSmall vecs:\n{}\n'.format(Ub, Us))
-
+        Eb, Ub = graphpca._sparse_eigendecomp(l, 4, which="LM")
+        Es, Us = graphpca._exact_eigendecomp(l, 4, which="LM")
+        self.assertTrue(
+            np.allclose(Eb, Es), "Big vals: {}\nSmall vals: {}\n".format(Eb, Es)
+        )
+        self.assertTrue(
+            np.allclose(Ub, Us, rtol=1e-09, atol=1e-09),
+            "Big vecs:\n{}\nSmall vecs:\n{}\n".format(Ub, Us),
+        )