Add docstrings for all the test modules and functions

cms-patatrack · Nov 2, 2023 · d1e5d59 · d1e5d59
1 parent 02d980c
commit d1e5d59
Show file tree

Hide file tree

Showing 11 changed files with 159 additions and 0 deletions.
diff --git a/tests/test_blob_dataset.py b/tests/test_blob_dataset.py
@@ -1,3 +1,8 @@
+'''
+Testing the algorithm on the blob dataset, a dataset where points are distributed to form
+round clusters
+'''
+
 from filecmp import cmp
 import CLUEstering as clue
 import numpy as np
@@ -10,10 +15,17 @@
 
 @pytest.fixture
 def blobs():
+    '''
+    Returns the dataframe containing the blob dataset
+    '''
     return pd.read_csv("./test_datasets/blob.csv")
 
 
 def test_blobs_clustering(blobs):
+    '''
+    Checks that the output of the clustering is the one given by the truth dataset
+    '''
+
     # Check if the output file already exists and if it does, delete it
     if os.path.isfile('./blobs_output.csv'):
         os.remove('./blobs_output.csv')

diff --git a/tests/test_change_domains.py b/tests/test_change_domains.py
@@ -1,3 +1,7 @@
+'''
+Testing the function for changing the domain ranges, using the blob dataset as a reference
+'''
+
 from math import pi
 import CLUEstering as clue
 import numpy as np
@@ -8,11 +12,17 @@
 
 @pytest.fixture
 def blob():
+    '''
+    Returns the dataframe containing the blob dataset
+    '''
     csv_file = './test_datasets/blob.csv'
     return csv_file
 
 
 def test_default_domains(blob):
+    '''
+    Check the values of the default domain ranges
+    '''
     clust = clue.clusterer(0.5, 5., 1.2)
     clust.read_data(blob)
 
@@ -25,6 +35,9 @@ def test_default_domains(blob):
 
 
 def test_change_domains_1():
+    '''
+    Check the renormalization for uniform data
+    '''
     # We generate data with zero mean and standard deviation, so that the
     # domain extremes are not normalized by the standard scaler
     x0 = np.zeros(shape=5)
@@ -55,6 +68,9 @@ def test_change_domains_1():
 
 
 def test_change_domains_2():
+    '''
+    Check the renormalization for non-uniform data
+    '''
     # We generate data with non-zero mean and standard deviation, and we check
     # that the domain exctremes are re-calculated as expected by the scaler
     x0 = np.arange(0, 5)

diff --git a/tests/test_circles_dataset.py b/tests/test_circles_dataset.py
@@ -1,3 +1,8 @@
+'''
+Testing the algorithm on the circle dataset, a dataset where points are distributed to form
+two concentric circles
+'''
+
 from filecmp import cmp
 import CLUEstering as clue
 import numpy as np
@@ -10,10 +15,17 @@
 
 @pytest.fixture
 def circles():
+    '''
+    Returns the dataframe containing the circle dataset
+    '''
     return pd.read_csv("./test_datasets/circles.csv")
 
 
 def test_circles_clustering(circles):
+    '''
+    Checks that the output of the clustering is the one given by the truth dataset
+    '''
+
     # Check if the output file already exists and if it does, delete it
     if os.path.isfile('./circles_output.csv'):
         os.remove('./circles_output.csv')

diff --git a/tests/test_clusterer_equality.py b/tests/test_clusterer_equality.py
@@ -1,3 +1,7 @@
+'''
+Test that the equality operator for clusterer objects works correctly
+'''
+
 import CLUEstering as clue
 import pandas as pd
 import pytest
@@ -7,15 +11,24 @@
 
 @pytest.fixture
 def moons():
+    '''
+    Returns the dataframe containing the moon dataset
+    '''
     return pd.read_csv("./test_datasets/moons.csv")
 
 
 @pytest.fixture
 def circles():
+    '''
+    Returns the dataframe containing the circle dataset
+    '''
     return pd.read_csv("./test_datasets/circles.csv")
 
 
 def test_clusterer_equality(moons, circles):
+    '''
+    Test the equality operator for clusterer objects
+    '''
     # Moons dataset
     clust1 = clue.clusterer(0.5, 5, 1.)
     clust1.read_data(moons)

diff --git a/tests/test_domain_extremes.py b/tests/test_domain_extremes.py
@@ -1,3 +1,7 @@
+'''
+Test that points at opposite extremes of a finite domain are adjacent
+'''
+
 from math import pi
 import CLUEstering as clue
 import pandas as pd
@@ -8,10 +12,16 @@
 
 @pytest.fixture
 def opposite_angles():
+    '''
+    Returns a dataset with points distributed at opposite sides of a finite range
+    '''
     return pd.read_csv("./test_datasets/opposite_angles.csv")
 
 
 def test_opposite_angles(opposite_angles):
+    '''
+    Test the clustering of points at opposite angles
+    '''
     # Test points with angles distributed at opposite extremes of the domain
     # This test assures that the code works for data with periodic coordinates
     clust = clue.clusterer(0.1, 1, 1.1)

diff --git a/tests/test_input_datatypes.py b/tests/test_input_datatypes.py
@@ -1,3 +1,7 @@
+'''
+Testing the algorithm with all the supported input data types
+'''
+
 import CLUEstering as clue
 import numpy as np
 import pandas as pd
@@ -7,6 +11,9 @@
 
 
 def test_read_array_except():
+    '''
+    Test the exception raised when passing incorrect arrays
+    '''
     arr = np.array([[1, 4, 5]])
     clust = clue.clusterer(0.4, 5., 1.2)
 
@@ -15,6 +22,9 @@ def test_read_array_except():
 
 
 def test_read_string_except():
+    '''
+    Test the exception raised when passing incorrect data files
+    '''
     clust = clue.clusterer(0.4, 5., 1.2)
 
     with pytest.raises(ValueError):
@@ -23,6 +33,9 @@ def test_read_string_except():
 
 @pytest.fixture
 def no_weight_dataset():
+    '''
+    Returns a dataset with no weight associated to the points
+    '''
     x0 = np.array([0, 1, 2, 3, 4])
     x1 = np.array([5, 6, 7, 8, 9])
     x2 = np.array([10, 11, 12, 13, 14])
@@ -33,6 +46,9 @@ def no_weight_dataset():
 
 @pytest.fixture
 def low_dimensionality_dataset():
+    '''
+    Returns a dataset with no coordinates
+    '''
     weight = np.array([1, 1, 1, 1, 1])
     data = {'weight': weight}
 
@@ -41,6 +57,9 @@ def low_dimensionality_dataset():
 
 @pytest.fixture
 def high_dimensionality_dataset():
+    '''
+    Returns a 11-dimensional dataset
+    '''
     x0 = np.array([0, 1, 2, 3, 4])
     x1 = np.array([0, 1, 2, 3, 4])
     x2 = np.array([0, 1, 2, 3, 4])
@@ -62,6 +81,9 @@ def high_dimensionality_dataset():
 def test_handle_dataframe_except(no_weight_dataset,
                                  low_dimensionality_dataset,
                                  high_dimensionality_dataset):
+    '''
+    Test the error handling when passing incorrect dataframes
+    '''
     clust = clue.clusterer(0.5, 5., 1.)
 
     with pytest.raises(ValueError):
@@ -74,19 +96,28 @@ def test_handle_dataframe_except(no_weight_dataset,
 
 @pytest.fixture
 def file():
+    '''
+    Returns the path to a test csv file
+    '''
     csv_file = './test_datasets/blob.csv'
     return csv_file
 
 
 @pytest.fixture
 def dataframe():
+    '''
+    Returns the dataframe of a test dataset
+    '''
     csv_file = './test_datasets/blob.csv'
     df_ = pd.read_csv(csv_file)
     return df_
 
 
 @pytest.fixture
 def dictionary(dataframe):
+    '''
+    Returns a test dataset as dictionary
+    '''
     data_dict = {'x0': dataframe['x0'].values.tolist(),
                  'x1': dataframe['x1'].values.tolist(),
                  'x2': dataframe['x2'].values.tolist(),
@@ -96,6 +127,9 @@ def dictionary(dataframe):
 
 @pytest.fixture
 def lists(dataframe):
+    '''
+    Returns a test dataset as a list of lists
+    '''
     data_lists = [dataframe['x0'].values.tolist(),
                   dataframe['x1'].values.tolist(),
                   dataframe['x2'].values.tolist(),
@@ -105,6 +139,9 @@ def lists(dataframe):
 
 @pytest.fixture
 def arrays(dataframe):
+    '''
+    Returns a test dataset as an array of arrays
+    '''
     data_arrays = np.array([np.array(dataframe['x0'].values.tolist()),
                             np.array(dataframe['x1'].values.tolist()),
                             np.array(dataframe['x2'].values.tolist()),

diff --git a/tests/test_kernels.py b/tests/test_kernels.py
@@ -1,10 +1,17 @@
+'''
+Test of the convolutional kernels
+'''
+
 import CLUEstering as clue
 import pytest
 import sys
 sys.path.insert(1, '../CLUEstering/')
 
 
 def test_flat_kernel_except():
+    '''
+    Test the exceptions raised by the flat kernel
+    '''
     clust = clue.clusterer(0.4, 5, 1.2)
     clust.read_data(clue.test_blobs(1000, 2))
 
@@ -16,6 +23,9 @@ def test_flat_kernel_except():
 
 
 def test_gaussian_kernel_except():
+    '''
+    Test the exceptions raised by the gaussian kernel
+    '''
     clust = clue.clusterer(0.4, 5, 1.2)
     clust.read_data(clue.test_blobs(1000, 2))
 
@@ -27,6 +37,9 @@ def test_gaussian_kernel_except():
 
 
 def test_exponential_kernel_except():
+    '''
+    Test the exceptions raised by the exponential kernel
+    '''
     clust = clue.clusterer(0.4, 5, 1.2)
     clust.read_data(clue.test_blobs(1000, 2))
 
@@ -38,6 +51,9 @@ def test_exponential_kernel_except():
 
 
 def test_custom_kernel_except():
+    '''
+    Test the exceptions raised by the custom kernel
+    '''
     clust = clue.clusterer(0.4, 5, 1.2)
     clust.read_data(clue.test_blobs(1000, 2))
 
@@ -47,6 +63,9 @@ def test_custom_kernel_except():
 
 
 def test_inexistent_kernel_except():
+    '''
+    Test the exceptions raised when choosing an inexistent kernel
+    '''
     clust = clue.clusterer(0.4, 5, 1.2)
     clust.read_data(clue.test_blobs(1000, 2))
 

diff --git a/tests/test_moons_dataset.py b/tests/test_moons_dataset.py
@@ -1,3 +1,8 @@
+'''
+Testing the algorithm on the circle dataset, a dataset where points are distributed to form
+two moon shaped clusters
+'''
+
 from filecmp import cmp
 import CLUEstering as clue
 import os
@@ -9,10 +14,17 @@
 
 @pytest.fixture
 def moons():
+    '''
+    Returns the dataframe containing the moon dataset
+    '''
     return pd.read_csv("./test_datasets/moons.csv")
 
 
 def test_circles_clustering(moons):
+    '''
+    Checks that the output of the clustering is the one given by the truth dataset
+    '''
+
     # Check if the output file already exists and if it does, delete it
     if os.path.isfile('./moons_output.csv'):
         os.remove('./moons_output.csv')

diff --git a/tests/test_sissa_dataset.py b/tests/test_sissa_dataset.py
@@ -1,3 +1,8 @@
+'''
+Testing the algorithm on the circle dataset, a dataset where points are distributed to form
+many small clusters
+'''
+
 from filecmp import cmp
 import CLUEstering as clue
 import os
@@ -9,10 +14,17 @@
 
 @pytest.fixture
 def sissa():
+    '''
+    Returns the dataframe containing the sissa dataset
+    '''
     return pd.read_csv("./test_datasets/sissa.csv")
 
 
 def test_circles_clustering(sissa):
+    '''
+    Checks that the output of the clustering is the one given by the truth dataset
+    '''
+
     # Check if the output file already exists and if it does, delete it
     if os.path.isfile('./sissa_output.csv'):
         os.remove('./sissa_output.csv')

diff --git a/tests/test_test_blobs.py b/tests/test_test_blobs.py
@@ -1,3 +1,7 @@
+'''
+Test the test_blobs function, which produces a set of gaussianely distributed blobs
+'''
+
 import CLUEstering as clue
 import pytest
 import sys