From d91b75e7c9e21891342eda2e5500949d52abf856 Mon Sep 17 00:00:00 2001 From: sbaldu Date: Thu, 2 Nov 2023 16:25:12 +0100 Subject: [PATCH] Format all `.py` files using autopep8 --- CLUEstering/CLUEstering.py | 93 ++++++++++++++++--------------- CLUEstering/__init__.py | 2 +- setup.py | 40 ++++++------- tests/test_blob_dataset.py | 13 +++-- tests/test_change_domains.py | 24 +++++--- tests/test_circles_dataset.py | 15 +++-- tests/test_clusterer_equality.py | 8 ++- tests/test_domain_extremes.py | 6 +- tests/test_input_datatypes.py | 77 +++++++++++++++---------- tests/test_kernels.py | 7 ++- tests/test_moons_dataset.py | 13 +++-- tests/test_sissa_dataset.py | 13 +++-- tests/test_test_blobs.py | 18 ++++-- tests/test_toydetector_dataset.py | 13 +++-- 14 files changed, 199 insertions(+), 143 deletions(-) diff --git a/CLUEstering/CLUEstering.py b/CLUEstering/CLUEstering.py index 55982bfb..5cfc018d 100644 --- a/CLUEstering/CLUEstering.py +++ b/CLUEstering/CLUEstering.py @@ -15,7 +15,8 @@ from sklearn.preprocessing import StandardScaler import CLUEsteringCPP as Algo -def test_blobs(n_samples: int, n_dim: int , n_blobs: int = 4, mean: float = 0, + +def test_blobs(n_samples: int, n_dim: int, n_blobs: int = 4, mean: float = 0, sigma: float = 0.5, x_max: float = 30, y_max: float = 30) -> pd.DataFrame: """ Returns a dataframe containing randomly generated 2-dimensional or 3-dimensional blobs. @@ -65,15 +66,14 @@ def test_blobs(n_samples: int, n_dim: int , n_blobs: int = 4, mean: float = 0, data['x1'] = blob_data.T[1] data['weight'] = np.full(shape=len(blob_data.T[0]), fill_value=1) - return pd.DataFrame(data) if n_dim == 3: data = {'x0': [], 'x1': [], 'x2': [], 'weight': []} sqrt_samples = int(sqrt(n_samples)) - z_values = np.random.normal(mean,sigma,sqrt_samples) + z_values = np.random.normal(mean, sigma, sqrt_samples) centers = [[x_max * rnd.random(), y_max * rnd.random()] for _ in range(n_blobs)] - for value in z_values: # for every z value, a layer is generated. + for value in z_values: # for every z value, a layer is generated. blob_data = make_blobs(n_samples=sqrt_samples, centers=np.array(centers))[0] data['x0'] = np.concatenate([data['x0'], blob_data.T[0]]) data['x1'] = np.concatenate([data['x1'], blob_data.T[1]]) @@ -106,12 +106,13 @@ class clustering_data: Number of points in the clustering data. """ - coords : np.ndarray - original_coords : np.ndarray - weight : np.ndarray - domain_ranges : list - n_dim : int - n_points : int + coords: np.ndarray + original_coords: np.ndarray + weight: np.ndarray + domain_ranges: list + n_dim: int + n_points: int + @dataclass(eq=False) class cluster_properties: @@ -135,12 +136,12 @@ class cluster_properties: Dataframe containing is_seed and cluster_ids as columns. """ - n_clusters : int - cluster_ids : np.ndarray - is_seed : np.ndarray - cluster_points : np.ndarray - points_per_cluster : np.ndarray - output_df : pd.DataFrame + n_clusters: int + cluster_ids: np.ndarray + is_seed: np.ndarray + cluster_points: np.ndarray + points_per_cluster: np.ndarray + output_df: pd.DataFrame def __eq__(self, other): if self.n_clusters != other.n_clusters: @@ -195,18 +196,18 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10): self.ppbin = ppbin # Initialize attributes - ## Data containers + # Data containers self.clust_data = None self.scaler = StandardScaler() - ## Kernel for calculation of local density + # Kernel for calculation of local density self.kernel = Algo.flatKernel(0.5) - ## Output attributes + # Output attributes self.clust_prop = None self.elapsed_time = 0. - def _read_array(self, input_data: Union[list,np.ndarray]) -> None: + def _read_array(self, input_data: Union[list, np.ndarray]) -> None: """ Reads data provided with lists or np.ndarrays @@ -235,7 +236,7 @@ def _read_array(self, input_data: Union[list,np.ndarray]) -> None: len(input_data[:-1]), len(input_data[-1])) - def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]: + def _read_string(self, input_data: str) -> Union[pd.DataFrame, None]: """ Reads data provided by passing a string containing the path to a csv file @@ -259,7 +260,7 @@ def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]: df_ = pd.read_csv(input_data) return df_ - def _read_dict_df(self, input_data: Union[dict,pd.DataFrame]) -> pd.DataFrame: + def _read_dict_df(self, input_data: Union[dict, pd.DataFrame]) -> pd.DataFrame: """ Reads data provided using dictionaries or pandas dataframes @@ -314,7 +315,7 @@ def _handle_dataframe(self, df_: pd.DataFrame) -> None: n_points = len(df_.index) coords = np.zeros(shape=(n_dim, n_points)) for dim in range(n_dim): - coords[dim] = np.array(df_.iloc[:,dim]) + coords[dim] = np.array(df_.iloc[:, dim]) self.clust_data = clustering_data(coords, np.copy(coords), @@ -339,10 +340,10 @@ def _rescale(self) -> None: for dim in range(self.clust_data.n_dim): self.clust_data.coords[dim] = \ - self.scaler.fit_transform(self.clust_data.coords[dim].reshape(-1, 1)).reshape(1, -1)[0] + self.scaler.fit_transform(self.clust_data.coords[dim].reshape(-1, 1)).reshape(1, -1)[0] def read_data(self, - input_data: Union[pd.DataFrame,str,dict,list,np.ndarray], + input_data: Union[pd.DataFrame, str, dict, list, np.ndarray], rescale: bool = True, **kwargs: tuple) -> None: """ @@ -435,7 +436,7 @@ def change_coordinates(self, **kwargs: types.FunctionType) -> None: self.clust_data.coords[int(coord[1])] = \ self.scaler.fit_transform( self.clust_data.coords[int(coord[1])].reshape(-1, 1) - ).reshape(1, -1)[0] + ).reshape(1, -1)[0] def change_domains(self, **kwargs: tuple) -> None: """ @@ -471,7 +472,7 @@ def change_domains(self, **kwargs: tuple) -> None: def choose_kernel(self, choice: str, - parameters: Union[list,None] = None, + parameters: Union[list, None] = None, function: types.FunctionType = lambda: 0) -> None: """ Changes the kernel used in the calculation of local density. The default kernel @@ -552,9 +553,9 @@ def run_clue(self, verbose: bool = False) -> None: """ start = time.time_ns() - cluster_id_is_seed = Algo.mainRun(self.dc_,self.rhoc,self.outlier,self.ppbin, - self.clust_data.domain_ranges,self.kernel, - self.clust_data.coords,self.clust_data.weight, + cluster_id_is_seed = Algo.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin, + self.clust_data.domain_ranges, self.kernel, + self.clust_data.coords, self.clust_data.weight, self.clust_data.n_dim) finish = time.time_ns() cluster_ids = np.array(cluster_id_is_seed[0]) @@ -577,12 +578,12 @@ def run_clue(self, verbose: bool = False) -> None: points_per_cluster, output_df) - self.elapsed_time = (finish - start)/(10**6) + self.elapsed_time = (finish - start) / (10**6) if verbose: print(f'CLUE run in {self.elapsed_time} ms') print(f'Number of clusters found: {self.clust_prop.n_clusters}') - def input_plotter(self, plot_title: str='', title_size: float = 16, + def input_plotter(self, plot_title: str = '', title_size: float = 16, x_label: str = 'x', y_label: str = 'y', z_label: str = 'z', label_size: float = 16, pt_size: float = 1, pt_colour: str = 'b', grid: bool = True, grid_style: str = '--', grid_size: float = 0.2, @@ -667,10 +668,10 @@ def input_plotter(self, plot_title: str='', title_size: float = 16, fig = plt.figure() ax_ = fig.add_subplot(projection='3d') ax_.scatter(cartesian_coords[0], - cartesian_coords[1], - cartesian_coords[2], - s=pt_size, - color=pt_colour) + cartesian_coords[1], + cartesian_coords[2], + s=pt_size, + color=pt_colour) # Customization of the plot title ax_.set_title(plot_title, fontsize=title_size) @@ -765,12 +766,12 @@ def cluster_plotter(self, plot_title: str = '', title_size: float = 16, max_clusterid = max(df_["cluster_ids"]) - df_out = df_[df_.cluster_ids == -1] # Outliers + df_out = df_[df_.cluster_ids == -1] # Outliers plt.scatter(df_out.x0, df_out.x1, s=outl_size, marker='x', color='0.4') - for i in range(0, max_clusterid+1): - dfi = df_[df_.cluster_ids == i] # ith cluster + for i in range(0, max_clusterid + 1): + dfi = df_[df_.cluster_ids == i] # ith cluster plt.scatter(dfi.x0, dfi.x1, s=pt_size, marker='.') - df_seed = df_[df_.isSeed == 1] # Only Seeds + df_seed = df_[df_.isSeed == 1] # Only Seeds plt.scatter(df_seed.x0, df_seed.x1, s=seed_size, color='r', marker='*') # Customization of the plot title @@ -804,13 +805,13 @@ def cluster_plotter(self, plot_title: str = '', title_size: float = 16, ax_ = fig.add_subplot(projection='3d') df_out = df_[df_.cluster_ids == -1] - ax_.scatter(df_out.x0, df_out.x1, df_out.x2, s=outl_size, color = 'grey', marker = 'x') - for i in range(0, max_clusterid+1): + ax_.scatter(df_out.x0, df_out.x1, df_out.x2, s=outl_size, color='grey', marker='x') + for i in range(0, max_clusterid + 1): dfi = df_[df_.cluster_ids == i] - ax_.scatter(dfi.x0, dfi.x1, dfi.x2, s=pt_size, marker = '.') + ax_.scatter(dfi.x0, dfi.x1, dfi.x2, s=pt_size, marker='.') - df_seed = df_[df_.isSeed == 1] # Only Seeds - ax_.scatter(df_seed.x0, df_seed.x1, df_seed.x2, s=seed_size, color = 'r', marker = '*') + df_seed = df_[df_.isSeed == 1] # Only Seeds + ax_.scatter(df_seed.x0, df_seed.x1, df_seed.x2, s=seed_size, color='r', marker='*') # Customization of the plot title ax_.set_title(plot_title, fontsize=title_size) @@ -862,4 +863,4 @@ def to_csv(self, output_folder: str, file_name: str) -> None: data['is_seed'] = self.clust_prop.is_seed df_ = pd.DataFrame(data) - df_.to_csv(out_path,index=False) + df_.to_csv(out_path, index=False) diff --git a/CLUEstering/__init__.py b/CLUEstering/__init__.py index 2216cf6d..eb8e190c 100644 --- a/CLUEstering/__init__.py +++ b/CLUEstering/__init__.py @@ -1,2 +1,2 @@ from CLUEstering.CLUEstering import clusterer -from CLUEstering.CLUEstering import test_blobs +from CLUEstering.CLUEstering import test_blobs diff --git a/setup.py b/setup.py index b704d223..1f38963b 100644 --- a/setup.py +++ b/setup.py @@ -4,14 +4,14 @@ __version__ = "1.4.0" this_directory = Path(__file__).parent -long_description = (this_directory/'README.md').read_text() +long_description = (this_directory / 'README.md').read_text() ext_modules = [ - Pybind11Extension( - "CLUEsteringCPP", - ['CLUEstering/binding.cc'], - include_dirs = ['CLUEstering/include/'] - ), + Pybind11Extension( + "CLUEsteringCPP", + ['CLUEstering/binding.cc'], + include_dirs=['CLUEstering/include/'] + ), ] setup( @@ -21,18 +21,18 @@ author_email="simone.balducci00@gmail.com", description='''A library that generalizes the original 2-dimensional CLUE algorithm made at CERN.''', - long_description=long_description, - long_description_content_type='text/markdown', - packages=find_packages(), - install_requires=['scikit-learn','numpy','matplotlib','pandas'], - ext_modules=ext_modules, - keywords=['Python','Clustering','Binding'], - python_requires='>=3.7', - classifiers=[ - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 3', - 'Operating System :: Unix', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - ] + long_description=long_description, + long_description_content_type='text/markdown', + packages=find_packages(), + install_requires=['scikit-learn', 'numpy', 'matplotlib', 'pandas'], + ext_modules=ext_modules, + keywords=['Python', 'Clustering', 'Binding'], + python_requires='>=3.7', + classifiers=[ + 'Intended Audience :: Developers', + 'Programming Language :: Python :: 3', + 'Operating System :: Unix', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + ] ) diff --git a/tests/test_blob_dataset.py b/tests/test_blob_dataset.py index c37fe8d2..e9a777db 100644 --- a/tests/test_blob_dataset.py +++ b/tests/test_blob_dataset.py @@ -1,24 +1,27 @@ +from filecmp import cmp +import CLUEstering as clue import numpy as np import os import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from filecmp import cmp + @pytest.fixture def blobs(): return pd.read_csv("./test_datasets/blob.csv") + def test_blobs_clustering(blobs): # Check if the output file already exists and if it does, delete it if os.path.isfile('./blobs_output.csv'): os.remove('./blobs_output.csv') - c = clue.clusterer(0.8,5,1.5) + c = clue.clusterer(0.8, 5, 1.5) c.read_data(blobs) c.run_clue() - c.to_csv('./','blobs_output.csv') + c.to_csv('./', 'blobs_output.csv') - assert cmp('./blobs_output.csv', './test_datasets/truth_files/blobs_truth.csv') + assert cmp('./blobs_output.csv', + './test_datasets/truth_files/blobs_truth.csv') diff --git a/tests/test_change_domains.py b/tests/test_change_domains.py index 9ff4e8e0..266da16c 100644 --- a/tests/test_change_domains.py +++ b/tests/test_change_domains.py @@ -1,15 +1,17 @@ +from math import pi +import CLUEstering as clue import numpy as np import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from math import pi + @pytest.fixture def blob(): csv_file = './test_datasets/blob.csv' return csv_file + def test_default_domains(blob): clust = clue.clusterer(0.5, 5., 1.2) clust.read_data(blob) @@ -21,6 +23,7 @@ def test_default_domains(blob): assert clust.clust_data.domain_ranges[1].min == -3.4028234663852886e+38 assert clust.clust_data.domain_ranges[1].max == 3.4028234663852886e+38 + def test_change_domains_1(): # We generate data with zero mean and standard deviation, so that the # domain extremes are not normalized by the standard scaler @@ -43,10 +46,13 @@ def test_change_domains_1(): clust.change_domains(x0=(0., 2.), x1=(-pi, pi)) # Check that the new domains are (0, 2) and (-pi, pi) - assert clust.clust_data.domain_ranges[0].min == 0. + assert clust.clust_data.domain_ranges[0].min == 0. assert clust.clust_data.domain_ranges[0].max == 2. - assert clust.clust_data.domain_ranges[1].min == pytest.approx(-pi, 0.0000001) - assert clust.clust_data.domain_ranges[1].max == pytest.approx(pi, 0.0000001) + assert clust.clust_data.domain_ranges[1].min == pytest.approx( + -pi, 0.0000001) + assert clust.clust_data.domain_ranges[1].max == pytest.approx( + pi, 0.0000001) + def test_change_domains_2(): # We generate data with non-zero mean and standard deviation, and we check @@ -70,7 +76,9 @@ def test_change_domains_2(): clust.change_domains(x0=(0., 2.), x1=(-pi, pi)) # Check that the new domains are (0, 2) and (-pi, pi) - assert clust.clust_data.domain_ranges[0].min == pytest.approx(-1.41, 0.01) + assert clust.clust_data.domain_ranges[0].min == pytest.approx(-1.41, 0.01) assert clust.clust_data.domain_ranges[0].max == 0. - assert clust.clust_data.domain_ranges[1].min == pytest.approx(-3.6356550, 0.0000001) - assert clust.clust_data.domain_ranges[1].max == pytest.approx(0.8072279, 0.0000001) + assert clust.clust_data.domain_ranges[1].min == pytest.approx( + -3.6356550, 0.0000001) + assert clust.clust_data.domain_ranges[1].max == pytest.approx( + 0.8072279, 0.0000001) diff --git a/tests/test_circles_dataset.py b/tests/test_circles_dataset.py index adbc6abf..08d244e2 100644 --- a/tests/test_circles_dataset.py +++ b/tests/test_circles_dataset.py @@ -1,26 +1,29 @@ +from filecmp import cmp +import CLUEstering as clue import numpy as np import os import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from filecmp import cmp + @pytest.fixture def circles(): return pd.read_csv("./test_datasets/circles.csv") + def test_circles_clustering(circles): # Check if the output file already exists and if it does, delete it if os.path.isfile('./circles_output.csv'): os.remove('./circles_output.csv') - c = clue.clusterer(0.9,5,1.5) + c = clue.clusterer(0.9, 5, 1.5) c.read_data(circles) c.change_coordinates(x0=lambda x: np.sqrt(x[0]**2 + x[1]**2), - x1=lambda x: np.arctan2(x[1],x[0])) + x1=lambda x: np.arctan2(x[1], x[0])) c.run_clue() - c.to_csv('./','circles_output.csv') + c.to_csv('./', 'circles_output.csv') - assert cmp('./circles_output.csv', './test_datasets/truth_files/circles_1000_truth.csv') + assert cmp('./circles_output.csv', + './test_datasets/truth_files/circles_1000_truth.csv') diff --git a/tests/test_clusterer_equality.py b/tests/test_clusterer_equality.py index a0fea4c1..a41683e4 100644 --- a/tests/test_clusterer_equality.py +++ b/tests/test_clusterer_equality.py @@ -1,13 +1,15 @@ +import CLUEstering as clue import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue + @pytest.fixture def moons(): return pd.read_csv("./test_datasets/moons.csv") + @pytest.fixture def circles(): return pd.read_csv("./test_datasets/circles.csv") @@ -25,12 +27,12 @@ def test_clusterer_equality(moons, circles): clust1_copy.run_clue() # Circles dataset - clust2 = clue.clusterer(0.9,5,1.5) + clust2 = clue.clusterer(0.9, 5, 1.5) clust2.read_data(circles) clust2.run_clue() # Create a copy of the circles clusterer to check the equality of clusterers - clust2_copy = clue.clusterer(0.9,5,1.5) + clust2_copy = clue.clusterer(0.9, 5, 1.5) clust2_copy.read_data(circles) clust2_copy.run_clue() diff --git a/tests/test_domain_extremes.py b/tests/test_domain_extremes.py index e74446e6..6599615c 100644 --- a/tests/test_domain_extremes.py +++ b/tests/test_domain_extremes.py @@ -1,14 +1,16 @@ +from math import pi +import CLUEstering as clue import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from math import pi + @pytest.fixture def opposite_angles(): return pd.read_csv("./test_datasets/opposite_angles.csv") + def test_opposite_angles(opposite_angles): # Test points with angles distributed at opposite extremes of the domain # This test assures that the code works for data with periodic coordinates diff --git a/tests/test_input_datatypes.py b/tests/test_input_datatypes.py index 839fe938..8d90ef5f 100644 --- a/tests/test_input_datatypes.py +++ b/tests/test_input_datatypes.py @@ -1,62 +1,68 @@ +import CLUEstering as clue import numpy as np import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue + def test_read_array_except(): arr = np.array([[1, 4, 5]]) - clust = clue.clusterer(0.4, 5., 1.2) + clust = clue.clusterer(0.4, 5., 1.2) with pytest.raises(ValueError): clust.read_data(arr) + def test_read_string_except(): - clust = clue.clusterer(0.4, 5., 1.2) + clust = clue.clusterer(0.4, 5., 1.2) with pytest.raises(ValueError): clust.read_data('./test_datasets/blob.dat') + @pytest.fixture def no_weight_dataset(): - x0 = np.array([0,1,2,3,4]) - x1 = np.array([5,6,7,8,9]) - x2 = np.array([10,11,12,13,14]) + x0 = np.array([0, 1, 2, 3, 4]) + x1 = np.array([5, 6, 7, 8, 9]) + x2 = np.array([10, 11, 12, 13, 14]) data = {'x0': x0, 'x1': x1, 'x2': x2} return pd.DataFrame(data) + @pytest.fixture def low_dimensionality_dataset(): - weight = np.array([1,1,1,1,1]) + weight = np.array([1, 1, 1, 1, 1]) data = {'weight': weight} return pd.DataFrame(data) + @pytest.fixture def high_dimensionality_dataset(): - x0 = np.array([0,1,2,3,4]) - x1 = np.array([0,1,2,3,4]) - x2 = np.array([0,1,2,3,4]) - x3 = np.array([0,1,2,3,4]) - x4 = np.array([0,1,2,3,4]) - x5 = np.array([0,1,2,3,4]) - x6 = np.array([0,1,2,3,4]) - x7 = np.array([0,1,2,3,4]) - x8 = np.array([0,1,2,3,4]) - x9 = np.array([0,1,2,3,4]) - x10 = np.array([0,1,2,3,4]) - weight = np.array([1,1,1,1,1]) + x0 = np.array([0, 1, 2, 3, 4]) + x1 = np.array([0, 1, 2, 3, 4]) + x2 = np.array([0, 1, 2, 3, 4]) + x3 = np.array([0, 1, 2, 3, 4]) + x4 = np.array([0, 1, 2, 3, 4]) + x5 = np.array([0, 1, 2, 3, 4]) + x6 = np.array([0, 1, 2, 3, 4]) + x7 = np.array([0, 1, 2, 3, 4]) + x8 = np.array([0, 1, 2, 3, 4]) + x9 = np.array([0, 1, 2, 3, 4]) + x10 = np.array([0, 1, 2, 3, 4]) + weight = np.array([1, 1, 1, 1, 1]) data = {'x0': x0, 'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5, 'x6': x6, 'x7': x7, 'x8': x8, 'x9': x9, 'x10': x10, 'weight': weight} return pd.DataFrame(data) + def test_handle_dataframe_except(no_weight_dataset, low_dimensionality_dataset, high_dimensionality_dataset): - clust = clue.clusterer(0.5,5.,1.) + clust = clue.clusterer(0.5, 5., 1.) with pytest.raises(ValueError): clust._handle_dataframe(no_weight_dataset) @@ -71,12 +77,14 @@ def file(): csv_file = './test_datasets/blob.csv' return csv_file + @pytest.fixture def dataframe(): csv_file = './test_datasets/blob.csv' df_ = pd.read_csv(csv_file) return df_ + @pytest.fixture def dictionary(dataframe): data_dict = {'x0': dataframe['x0'].values.tolist(), @@ -85,6 +93,7 @@ def dictionary(dataframe): 'weight': dataframe['weight'].values.tolist()} return data_dict + @pytest.fixture def lists(dataframe): data_lists = [dataframe['x0'].values.tolist(), @@ -93,6 +102,7 @@ def lists(dataframe): dataframe['weight'].values.tolist()] return data_lists + @pytest.fixture def arrays(dataframe): data_arrays = np.array([np.array(dataframe['x0'].values.tolist()), @@ -102,74 +112,81 @@ def arrays(dataframe): return data_arrays # Test the different data types singularly, so to make it easier to debug in case of error + + def test_csv(file): """ Test that CLUE works when the data is written in a csv file. """ - clust = clue.clusterer(1,5,1.6) + clust = clue.clusterer(1, 5, 1.6) clust.read_data(file) clust.run_clue() + def test_dict(dictionary): """ Test that CLUE works when the data is contained in a dictionary. """ - clust = clue.clusterer(1,5,1.6) + clust = clue.clusterer(1, 5, 1.6) clust.read_data(dictionary) clust.run_clue() + def test_pddf(dataframe): """ Test that CLUE works when the data is contained in a pandas dataframe. """ - clust = clue.clusterer(1,5,1.6) + clust = clue.clusterer(1, 5, 1.6) clust.read_data(dataframe) clust.run_clue() + def test_list(lists): """ Test that CLUE works when the data is contained in lists. """ - clust = clue.clusterer(1,5,1.6) + clust = clue.clusterer(1, 5, 1.6) clust.read_data(lists) clust.run_clue() + def test_ndarray(arrays): """ Test that CLUE works when the data is contained in numpy ndarrays. """ - clust = clue.clusterer(1,5,1.6) + clust = clue.clusterer(1, 5, 1.6) clust.read_data(arrays) clust.run_clue() + def test_same_result(file, dictionary, dataframe, lists, arrays): """ Run CLUE for all the supported data types and assert that the output is the same for all of them. """ - clust_file = clue.clusterer(1,5,1.6) + clust_file = clue.clusterer(1, 5, 1.6) clust_file.read_data(file) clust_file.run_clue() - clust_dict = clue.clusterer(1,5,1.6) + clust_dict = clue.clusterer(1, 5, 1.6) clust_dict.read_data(dictionary) clust_dict.run_clue() - clust_df = clue.clusterer(1,5,1.6) + clust_df = clue.clusterer(1, 5, 1.6) clust_df.read_data(dataframe) clust_df.run_clue() - clust_list = clue.clusterer(1,5,1.6) + clust_list = clue.clusterer(1, 5, 1.6) clust_list.read_data(lists) clust_list.run_clue() - clust_arr = clue.clusterer(1,5,1.6) + clust_arr = clue.clusterer(1, 5, 1.6) clust_arr.read_data(arrays) clust_arr.run_clue() diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 6cfb86a7..8957ad8b 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -1,7 +1,8 @@ +import CLUEstering as clue import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue + def test_flat_kernel_except(): clust = clue.clusterer(0.4, 5, 1.2) @@ -13,6 +14,7 @@ def test_flat_kernel_except(): with pytest.raises(ValueError): clust.choose_kernel('flat', [1., 2.]) + def test_gaussian_kernel_except(): clust = clue.clusterer(0.4, 5, 1.2) clust.read_data(clue.test_blobs(1000, 2)) @@ -23,6 +25,7 @@ def test_gaussian_kernel_except(): with pytest.raises(ValueError): clust.choose_kernel('gaus', [1.]) + def test_exponential_kernel_except(): clust = clue.clusterer(0.4, 5, 1.2) clust.read_data(clue.test_blobs(1000, 2)) @@ -33,6 +36,7 @@ def test_exponential_kernel_except(): with pytest.raises(ValueError): clust.choose_kernel('exp', [1., 2., 3.]) + def test_custom_kernel_except(): clust = clue.clusterer(0.4, 5, 1.2) clust.read_data(clue.test_blobs(1000, 2)) @@ -41,6 +45,7 @@ def test_custom_kernel_except(): with pytest.raises(ValueError): clust.choose_kernel('custom', [1., 2.]) + def test_inexistent_kernel_except(): clust = clue.clusterer(0.4, 5, 1.2) clust.read_data(clue.test_blobs(1000, 2)) diff --git a/tests/test_moons_dataset.py b/tests/test_moons_dataset.py index 119ec537..238937d3 100644 --- a/tests/test_moons_dataset.py +++ b/tests/test_moons_dataset.py @@ -1,23 +1,26 @@ +from filecmp import cmp +import CLUEstering as clue import os import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from filecmp import cmp + @pytest.fixture def moons(): return pd.read_csv("./test_datasets/moons.csv") + def test_circles_clustering(moons): # Check if the output file already exists and if it does, delete it if os.path.isfile('./moons_output.csv'): os.remove('./moons_output.csv') - c = clue.clusterer(0.5,5,1.) + c = clue.clusterer(0.5, 5, 1.) c.read_data(moons) c.run_clue() - c.to_csv('./','moons_output.csv') + c.to_csv('./', 'moons_output.csv') - assert cmp('./moons_output.csv', './test_datasets/truth_files/moons_1000_truth.csv') + assert cmp('./moons_output.csv', + './test_datasets/truth_files/moons_1000_truth.csv') diff --git a/tests/test_sissa_dataset.py b/tests/test_sissa_dataset.py index 2edd4e91..ba6de325 100644 --- a/tests/test_sissa_dataset.py +++ b/tests/test_sissa_dataset.py @@ -1,23 +1,26 @@ +from filecmp import cmp +import CLUEstering as clue import os import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from filecmp import cmp + @pytest.fixture def sissa(): return pd.read_csv("./test_datasets/sissa.csv") + def test_circles_clustering(sissa): # Check if the output file already exists and if it does, delete it if os.path.isfile('./sissa_output.csv'): os.remove('./sissa_output.csv') - c = clue.clusterer(0.4,5,1.) + c = clue.clusterer(0.4, 5, 1.) c.read_data(sissa) c.run_clue() - c.to_csv('./','sissa_output.csv') + c.to_csv('./', 'sissa_output.csv') - assert cmp('./sissa_output.csv', './test_datasets/truth_files/sissa_1000_truth.csv') + assert cmp('./sissa_output.csv', + './test_datasets/truth_files/sissa_1000_truth.csv') diff --git a/tests/test_test_blobs.py b/tests/test_test_blobs.py index d8c6e962..37064235 100644 --- a/tests/test_test_blobs.py +++ b/tests/test_test_blobs.py @@ -1,36 +1,42 @@ +import CLUEstering as clue import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue + def test_except_1(): clust = clue.clusterer(0.4, 5., 1.2) - + with pytest.raises(ValueError): clust.read_data(clue.test_blobs(n_samples=1000, n_dim=2, x_max=-3.)) with pytest.raises(ValueError): clust.read_data(clue.test_blobs(n_samples=1000, n_dim=2, y_max=-2.)) with pytest.raises(ValueError): - clust.read_data(clue.test_blobs(n_samples=1000, n_dim=2, x_max=-3., y_max=-2.)) + clust.read_data(clue.test_blobs( + n_samples=1000, n_dim=2, x_max=-3., y_max=-2.)) + def test_except_2(): clust = clue.clusterer(0.4, 5., 1.2) - + with pytest.raises(ValueError): clust.read_data(clue.test_blobs(n_samples=1000, n_dim=2, n_blobs=-2)) + def test_except_3(): clust = clue.clusterer(0.4, 5., 1.2) - + with pytest.raises(ValueError): clust.read_data(clue.test_blobs(n_samples=1000, n_dim=2, sigma=-2.)) + def test_except_4(): clust = clue.clusterer(0.4, 5., 1.2) - + with pytest.raises(ValueError): clust.read_data(clue.test_blobs(n_samples=1000, n_dim=4)) + def test_successful_run(): # Since the blobs are randomly generated, it is not possible to precisely # predict the result of the clustering a priory diff --git a/tests/test_toydetector_dataset.py b/tests/test_toydetector_dataset.py index 457493e4..0825d171 100644 --- a/tests/test_toydetector_dataset.py +++ b/tests/test_toydetector_dataset.py @@ -1,23 +1,26 @@ +from filecmp import cmp +import CLUEstering as clue import os import pandas as pd import pytest import sys sys.path.insert(1, '../CLUEstering/') -import CLUEstering as clue -from filecmp import cmp + @pytest.fixture def toy_det(): return pd.read_csv("./test_datasets/toyDetector.csv") + def test_circles_clustering(toy_det): # Check if the output file already exists and if it does, delete it if os.path.isfile('./toy_det_output.csv'): os.remove('./toy_det_output.csv') - c = clue.clusterer(0.06,5,1.) + c = clue.clusterer(0.06, 5, 1.) c.read_data(toy_det) c.run_clue() - c.to_csv('./','toy_det_output.csv') + c.to_csv('./', 'toy_det_output.csv') - assert cmp('./toy_det_output.csv', './test_datasets/truth_files/toy_det_1000_truth.csv') + assert cmp('./toy_det_output.csv', + './test_datasets/truth_files/toy_det_1000_truth.csv')