Format all .py files using autopep8

cms-patatrack · Nov 2, 2023 · d91b75e · d91b75e
1 parent 218b6f3
commit d91b75e
Show file tree

Hide file tree

Showing 14 changed files with 199 additions and 143 deletions.
diff --git a/CLUEstering/CLUEstering.py b/CLUEstering/CLUEstering.py
@@ -15,7 +15,8 @@
 from sklearn.preprocessing import StandardScaler
 import CLUEsteringCPP as Algo
 
-def test_blobs(n_samples: int, n_dim: int , n_blobs: int = 4, mean: float = 0,
+
+def test_blobs(n_samples: int, n_dim: int, n_blobs: int = 4, mean: float = 0,
                sigma: float = 0.5, x_max: float = 30, y_max: float = 30) -> pd.DataFrame:
     """
     Returns a dataframe containing randomly generated 2-dimensional or 3-dimensional blobs.
@@ -65,15 +66,14 @@ def test_blobs(n_samples: int, n_dim: int , n_blobs: int = 4, mean: float = 0,
         data['x1'] = blob_data.T[1]
         data['weight'] = np.full(shape=len(blob_data.T[0]), fill_value=1)
 
-
         return pd.DataFrame(data)
     if n_dim == 3:
         data = {'x0': [], 'x1': [], 'x2': [], 'weight': []}
         sqrt_samples = int(sqrt(n_samples))
-        z_values = np.random.normal(mean,sigma,sqrt_samples)
+        z_values = np.random.normal(mean, sigma, sqrt_samples)
         centers = [[x_max * rnd.random(), y_max * rnd.random()] for _ in range(n_blobs)]
 
-        for value in z_values: # for every z value, a layer is generated.
+        for value in z_values:  # for every z value, a layer is generated.
             blob_data = make_blobs(n_samples=sqrt_samples, centers=np.array(centers))[0]
             data['x0'] = np.concatenate([data['x0'], blob_data.T[0]])
             data['x1'] = np.concatenate([data['x1'], blob_data.T[1]])
@@ -106,12 +106,13 @@ class clustering_data:
         Number of points in the clustering data.
     """
 
-    coords : np.ndarray
-    original_coords : np.ndarray
-    weight : np.ndarray
-    domain_ranges : list
-    n_dim : int
-    n_points : int
+    coords: np.ndarray
+    original_coords: np.ndarray
+    weight: np.ndarray
+    domain_ranges: list
+    n_dim: int
+    n_points: int
+
 
 @dataclass(eq=False)
 class cluster_properties:
@@ -135,12 +136,12 @@ class cluster_properties:
         Dataframe containing is_seed and cluster_ids as columns.
     """
 
-    n_clusters : int
-    cluster_ids : np.ndarray
-    is_seed : np.ndarray
-    cluster_points : np.ndarray
-    points_per_cluster : np.ndarray
-    output_df : pd.DataFrame
+    n_clusters: int
+    cluster_ids: np.ndarray
+    is_seed: np.ndarray
+    cluster_points: np.ndarray
+    points_per_cluster: np.ndarray
+    output_df: pd.DataFrame
 
     def __eq__(self, other):
         if self.n_clusters != other.n_clusters:
@@ -195,18 +196,18 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
         self.ppbin = ppbin
 
         # Initialize attributes
-        ## Data containers
+        # Data containers
         self.clust_data = None
         self.scaler = StandardScaler()
 
-        ## Kernel for calculation of local density
+        # Kernel for calculation of local density
         self.kernel = Algo.flatKernel(0.5)
 
-        ## Output attributes
+        # Output attributes
         self.clust_prop = None
         self.elapsed_time = 0.
 
-    def _read_array(self, input_data: Union[list,np.ndarray]) -> None:
+    def _read_array(self, input_data: Union[list, np.ndarray]) -> None:
         """
         Reads data provided with lists or np.ndarrays
 
@@ -235,7 +236,7 @@ def _read_array(self, input_data: Union[list,np.ndarray]) -> None:
                                           len(input_data[:-1]),
                                           len(input_data[-1]))
 
-    def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]:
+    def _read_string(self, input_data: str) -> Union[pd.DataFrame, None]:
         """
         Reads data provided by passing a string containing the path to a csv file
 
@@ -259,7 +260,7 @@ def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]:
         df_ = pd.read_csv(input_data)
         return df_
 
-    def _read_dict_df(self, input_data: Union[dict,pd.DataFrame]) -> pd.DataFrame:
+    def _read_dict_df(self, input_data: Union[dict, pd.DataFrame]) -> pd.DataFrame:
         """
         Reads data provided using dictionaries or pandas dataframes
 
@@ -314,7 +315,7 @@ def _handle_dataframe(self, df_: pd.DataFrame) -> None:
         n_points = len(df_.index)
         coords = np.zeros(shape=(n_dim, n_points))
         for dim in range(n_dim):
-            coords[dim] = np.array(df_.iloc[:,dim])
+            coords[dim] = np.array(df_.iloc[:, dim])
 
         self.clust_data = clustering_data(coords,
                                           np.copy(coords),
@@ -339,10 +340,10 @@ def _rescale(self) -> None:
 
         for dim in range(self.clust_data.n_dim):
             self.clust_data.coords[dim] = \
-            self.scaler.fit_transform(self.clust_data.coords[dim].reshape(-1, 1)).reshape(1, -1)[0]
+                self.scaler.fit_transform(self.clust_data.coords[dim].reshape(-1, 1)).reshape(1, -1)[0]
 
     def read_data(self,
-                  input_data: Union[pd.DataFrame,str,dict,list,np.ndarray],
+                  input_data: Union[pd.DataFrame, str, dict, list, np.ndarray],
                   rescale: bool = True,
                   **kwargs: tuple) -> None:
         """
@@ -435,7 +436,7 @@ def change_coordinates(self, **kwargs: types.FunctionType) -> None:
             self.clust_data.coords[int(coord[1])] = \
                 self.scaler.fit_transform(
                     self.clust_data.coords[int(coord[1])].reshape(-1, 1)
-                ).reshape(1, -1)[0]
+            ).reshape(1, -1)[0]
 
     def change_domains(self, **kwargs: tuple) -> None:
         """
@@ -471,7 +472,7 @@ def change_domains(self, **kwargs: tuple) -> None:
 
     def choose_kernel(self,
                       choice: str,
-                      parameters: Union[list,None] = None,
+                      parameters: Union[list, None] = None,
                       function: types.FunctionType = lambda: 0) -> None:
         """
         Changes the kernel used in the calculation of local density. The default kernel
@@ -552,9 +553,9 @@ def run_clue(self, verbose: bool = False) -> None:
         """
 
         start = time.time_ns()
-        cluster_id_is_seed = Algo.mainRun(self.dc_,self.rhoc,self.outlier,self.ppbin,
-                                          self.clust_data.domain_ranges,self.kernel,
-                                          self.clust_data.coords,self.clust_data.weight,
+        cluster_id_is_seed = Algo.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
+                                          self.clust_data.domain_ranges, self.kernel,
+                                          self.clust_data.coords, self.clust_data.weight,
                                           self.clust_data.n_dim)
         finish = time.time_ns()
         cluster_ids = np.array(cluster_id_is_seed[0])
@@ -577,12 +578,12 @@ def run_clue(self, verbose: bool = False) -> None:
                                              points_per_cluster,
                                              output_df)
 
-        self.elapsed_time = (finish - start)/(10**6)
+        self.elapsed_time = (finish - start) / (10**6)
         if verbose:
             print(f'CLUE run in {self.elapsed_time} ms')
             print(f'Number of clusters found: {self.clust_prop.n_clusters}')
 
-    def input_plotter(self, plot_title: str='', title_size: float = 16,
+    def input_plotter(self, plot_title: str = '', title_size: float = 16,
                       x_label: str = 'x', y_label: str = 'y', z_label: str = 'z',
                       label_size: float = 16, pt_size: float = 1, pt_colour: str = 'b',
                       grid: bool = True, grid_style: str = '--', grid_size: float = 0.2,
@@ -667,10 +668,10 @@ def input_plotter(self, plot_title: str='', title_size: float = 16,
             fig = plt.figure()
             ax_ = fig.add_subplot(projection='3d')
             ax_.scatter(cartesian_coords[0],
-                       cartesian_coords[1],
-                       cartesian_coords[2],
-                       s=pt_size,
-                       color=pt_colour)
+                        cartesian_coords[1],
+                        cartesian_coords[2],
+                        s=pt_size,
+                        color=pt_colour)
 
             # Customization of the plot title
             ax_.set_title(plot_title, fontsize=title_size)
@@ -765,12 +766,12 @@ def cluster_plotter(self, plot_title: str = '', title_size: float = 16,
 
             max_clusterid = max(df_["cluster_ids"])
 
-            df_out = df_[df_.cluster_ids == -1] # Outliers
+            df_out = df_[df_.cluster_ids == -1]  # Outliers
             plt.scatter(df_out.x0, df_out.x1, s=outl_size, marker='x', color='0.4')
-            for i in range(0, max_clusterid+1):
-                dfi = df_[df_.cluster_ids == i] # ith cluster
+            for i in range(0, max_clusterid + 1):
+                dfi = df_[df_.cluster_ids == i]  # ith cluster
                 plt.scatter(dfi.x0, dfi.x1, s=pt_size, marker='.')
-            df_seed = df_[df_.isSeed == 1] # Only Seeds
+            df_seed = df_[df_.isSeed == 1]  # Only Seeds
             plt.scatter(df_seed.x0, df_seed.x1, s=seed_size, color='r', marker='*')
 
             # Customization of the plot title
@@ -804,13 +805,13 @@ def cluster_plotter(self, plot_title: str = '', title_size: float = 16,
             ax_ = fig.add_subplot(projection='3d')
 
             df_out = df_[df_.cluster_ids == -1]
-            ax_.scatter(df_out.x0, df_out.x1, df_out.x2, s=outl_size, color = 'grey', marker = 'x')
-            for i in range(0, max_clusterid+1):
+            ax_.scatter(df_out.x0, df_out.x1, df_out.x2, s=outl_size, color='grey', marker='x')
+            for i in range(0, max_clusterid + 1):
                 dfi = df_[df_.cluster_ids == i]
-                ax_.scatter(dfi.x0, dfi.x1, dfi.x2, s=pt_size, marker = '.')
+                ax_.scatter(dfi.x0, dfi.x1, dfi.x2, s=pt_size, marker='.')
 
-            df_seed = df_[df_.isSeed == 1] # Only Seeds
-            ax_.scatter(df_seed.x0, df_seed.x1, df_seed.x2, s=seed_size, color = 'r', marker = '*')
+            df_seed = df_[df_.isSeed == 1]  # Only Seeds
+            ax_.scatter(df_seed.x0, df_seed.x1, df_seed.x2, s=seed_size, color='r', marker='*')
 
             # Customization of the plot title
             ax_.set_title(plot_title, fontsize=title_size)
@@ -862,4 +863,4 @@ def to_csv(self, output_folder: str, file_name: str) -> None:
         data['is_seed'] = self.clust_prop.is_seed
 
         df_ = pd.DataFrame(data)
-        df_.to_csv(out_path,index=False)
+        df_.to_csv(out_path, index=False)
diff --git a/CLUEstering/__init__.py b/CLUEstering/__init__.py
@@ -1,2 +1,2 @@
 from CLUEstering.CLUEstering import clusterer
-from CLUEstering.CLUEstering import test_blobs 
+from CLUEstering.CLUEstering import test_blobs
diff --git a/setup.py b/setup.py
@@ -4,14 +4,14 @@
 
 __version__ = "1.4.0"
 this_directory = Path(__file__).parent
-long_description = (this_directory/'README.md').read_text()
+long_description = (this_directory / 'README.md').read_text()
 
 ext_modules = [
-	Pybind11Extension(
-		"CLUEsteringCPP",
-		['CLUEstering/binding.cc'],
-        include_dirs = ['CLUEstering/include/']
-	),
+    Pybind11Extension(
+        "CLUEsteringCPP",
+        ['CLUEstering/binding.cc'],
+        include_dirs=['CLUEstering/include/']
+    ),
 ]
 
 setup(
@@ -21,18 +21,18 @@
     author_email="[email protected]",
     description='''A library that generalizes the original 2-dimensional CLUE
 				 algorithm made at CERN.''',
-	 long_description=long_description,
-	 long_description_content_type='text/markdown',
-	 packages=find_packages(),
-	 install_requires=['scikit-learn','numpy','matplotlib','pandas'],
-	 ext_modules=ext_modules,
-	 keywords=['Python','Clustering','Binding'],
-	 python_requires='>=3.7',
-	 classifiers=[
-		'Intended Audience :: Developers',
-		'Programming Language :: Python :: 3',
-		'Operating System :: Unix',
-		'Operating System :: MacOS :: MacOS X',
-		'Operating System :: Microsoft :: Windows',
-	 ]
+         long_description=long_description,
+         long_description_content_type='text/markdown',
+         packages=find_packages(),
+         install_requires=['scikit-learn', 'numpy', 'matplotlib', 'pandas'],
+         ext_modules=ext_modules,
+         keywords=['Python', 'Clustering', 'Binding'],
+         python_requires='>=3.7',
+         classifiers=[
+                'Intended Audience :: Developers',
+                'Programming Language :: Python :: 3',
+                'Operating System :: Unix',
+                'Operating System :: MacOS :: MacOS X',
+                'Operating System :: Microsoft :: Windows',
+         ]
 )
diff --git a/tests/test_blob_dataset.py b/tests/test_blob_dataset.py
@@ -1,24 +1,27 @@
+from filecmp import cmp
+import CLUEstering as clue
 import numpy as np
 import os
 import pandas as pd
 import pytest
 import sys
 sys.path.insert(1, '../CLUEstering/')
-import CLUEstering as clue
-from filecmp import cmp
+
 
 @pytest.fixture
 def blobs():
     return pd.read_csv("./test_datasets/blob.csv")
 
+
 def test_blobs_clustering(blobs):
     # Check if the output file already exists and if it does, delete it
     if os.path.isfile('./blobs_output.csv'):
         os.remove('./blobs_output.csv')
 
-    c = clue.clusterer(0.8,5,1.5)
+    c = clue.clusterer(0.8, 5, 1.5)
     c.read_data(blobs)
     c.run_clue()
-    c.to_csv('./','blobs_output.csv')
+    c.to_csv('./', 'blobs_output.csv')
 
-    assert cmp('./blobs_output.csv', './test_datasets/truth_files/blobs_truth.csv')
+    assert cmp('./blobs_output.csv',
+               './test_datasets/truth_files/blobs_truth.csv')
diff --git a/tests/test_change_domains.py b/tests/test_change_domains.py
@@ -1,15 +1,17 @@
+from math import pi
+import CLUEstering as clue
 import numpy as np
 import pytest
 import sys
 sys.path.insert(1, '../CLUEstering/')
-import CLUEstering as clue
-from math import pi
+
 
 @pytest.fixture
 def blob():
     csv_file = './test_datasets/blob.csv'
     return csv_file
 
+
 def test_default_domains(blob):
     clust = clue.clusterer(0.5, 5., 1.2)
     clust.read_data(blob)
@@ -21,6 +23,7 @@ def test_default_domains(blob):
     assert clust.clust_data.domain_ranges[1].min == -3.4028234663852886e+38
     assert clust.clust_data.domain_ranges[1].max == 3.4028234663852886e+38
 
+
 def test_change_domains_1():
     # We generate data with zero mean and standard deviation, so that the
     # domain extremes are not normalized by the standard scaler
@@ -43,10 +46,13 @@ def test_change_domains_1():
     clust.change_domains(x0=(0., 2.), x1=(-pi, pi))
 
     # Check that the new domains are (0, 2) and (-pi, pi)
-    assert clust.clust_data.domain_ranges[0].min == 0. 
+    assert clust.clust_data.domain_ranges[0].min == 0.
     assert clust.clust_data.domain_ranges[0].max == 2.
-    assert clust.clust_data.domain_ranges[1].min == pytest.approx(-pi, 0.0000001)
-    assert clust.clust_data.domain_ranges[1].max == pytest.approx(pi, 0.0000001)
+    assert clust.clust_data.domain_ranges[1].min == pytest.approx(
+        -pi, 0.0000001)
+    assert clust.clust_data.domain_ranges[1].max == pytest.approx(
+        pi, 0.0000001)
+
 
 def test_change_domains_2():
     # We generate data with non-zero mean and standard deviation, and we check
@@ -70,7 +76,9 @@ def test_change_domains_2():
     clust.change_domains(x0=(0., 2.), x1=(-pi, pi))
 
     # Check that the new domains are (0, 2) and (-pi, pi)
-    assert clust.clust_data.domain_ranges[0].min == pytest.approx(-1.41, 0.01) 
+    assert clust.clust_data.domain_ranges[0].min == pytest.approx(-1.41, 0.01)
     assert clust.clust_data.domain_ranges[0].max == 0.
-    assert clust.clust_data.domain_ranges[1].min == pytest.approx(-3.6356550, 0.0000001)
-    assert clust.clust_data.domain_ranges[1].max == pytest.approx(0.8072279, 0.0000001)
+    assert clust.clust_data.domain_ranges[1].min == pytest.approx(
+        -3.6356550, 0.0000001)
+    assert clust.clust_data.domain_ranges[1].max == pytest.approx(
+        0.8072279, 0.0000001)