Skip to content

Commit

Permalink
Add docstrings for all the test modules and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
sbaldu committed Nov 2, 2023
1 parent 02d980c commit d1e5d59
Show file tree
Hide file tree
Showing 11 changed files with 159 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tests/test_blob_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
'''
Testing the algorithm on the blob dataset, a dataset where points are distributed to form
round clusters
'''

from filecmp import cmp
import CLUEstering as clue
import numpy as np
Expand All @@ -10,10 +15,17 @@

@pytest.fixture
def blobs():
'''
Returns the dataframe containing the blob dataset
'''
return pd.read_csv("./test_datasets/blob.csv")


def test_blobs_clustering(blobs):
'''
Checks that the output of the clustering is the one given by the truth dataset
'''

# Check if the output file already exists and if it does, delete it
if os.path.isfile('./blobs_output.csv'):
os.remove('./blobs_output.csv')
Expand Down
16 changes: 16 additions & 0 deletions tests/test_change_domains.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Testing the function for changing the domain ranges, using the blob dataset as a reference
'''

from math import pi
import CLUEstering as clue
import numpy as np
Expand All @@ -8,11 +12,17 @@

@pytest.fixture
def blob():
'''
Returns the dataframe containing the blob dataset
'''
csv_file = './test_datasets/blob.csv'
return csv_file


def test_default_domains(blob):
'''
Check the values of the default domain ranges
'''
clust = clue.clusterer(0.5, 5., 1.2)
clust.read_data(blob)

Expand All @@ -25,6 +35,9 @@ def test_default_domains(blob):


def test_change_domains_1():
'''
Check the renormalization for uniform data
'''
# We generate data with zero mean and standard deviation, so that the
# domain extremes are not normalized by the standard scaler
x0 = np.zeros(shape=5)
Expand Down Expand Up @@ -55,6 +68,9 @@ def test_change_domains_1():


def test_change_domains_2():
'''
Check the renormalization for non-uniform data
'''
# We generate data with non-zero mean and standard deviation, and we check
# that the domain exctremes are re-calculated as expected by the scaler
x0 = np.arange(0, 5)
Expand Down
12 changes: 12 additions & 0 deletions tests/test_circles_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
'''
Testing the algorithm on the circle dataset, a dataset where points are distributed to form
two concentric circles
'''

from filecmp import cmp
import CLUEstering as clue
import numpy as np
Expand All @@ -10,10 +15,17 @@

@pytest.fixture
def circles():
'''
Returns the dataframe containing the circle dataset
'''
return pd.read_csv("./test_datasets/circles.csv")


def test_circles_clustering(circles):
'''
Checks that the output of the clustering is the one given by the truth dataset
'''

# Check if the output file already exists and if it does, delete it
if os.path.isfile('./circles_output.csv'):
os.remove('./circles_output.csv')
Expand Down
13 changes: 13 additions & 0 deletions tests/test_clusterer_equality.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Test that the equality operator for clusterer objects works correctly
'''

import CLUEstering as clue
import pandas as pd
import pytest
Expand All @@ -7,15 +11,24 @@

@pytest.fixture
def moons():
'''
Returns the dataframe containing the moon dataset
'''
return pd.read_csv("./test_datasets/moons.csv")


@pytest.fixture
def circles():
'''
Returns the dataframe containing the circle dataset
'''
return pd.read_csv("./test_datasets/circles.csv")


def test_clusterer_equality(moons, circles):
'''
Test the equality operator for clusterer objects
'''
# Moons dataset
clust1 = clue.clusterer(0.5, 5, 1.)
clust1.read_data(moons)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_domain_extremes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Test that points at opposite extremes of a finite domain are adjacent
'''

from math import pi
import CLUEstering as clue
import pandas as pd
Expand All @@ -8,10 +12,16 @@

@pytest.fixture
def opposite_angles():
'''
Returns a dataset with points distributed at opposite sides of a finite range
'''
return pd.read_csv("./test_datasets/opposite_angles.csv")


def test_opposite_angles(opposite_angles):
'''
Test the clustering of points at opposite angles
'''
# Test points with angles distributed at opposite extremes of the domain
# This test assures that the code works for data with periodic coordinates
clust = clue.clusterer(0.1, 1, 1.1)
Expand Down
37 changes: 37 additions & 0 deletions tests/test_input_datatypes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Testing the algorithm with all the supported input data types
'''

import CLUEstering as clue
import numpy as np
import pandas as pd
Expand All @@ -7,6 +11,9 @@


def test_read_array_except():
'''
Test the exception raised when passing incorrect arrays
'''
arr = np.array([[1, 4, 5]])
clust = clue.clusterer(0.4, 5., 1.2)

Expand All @@ -15,6 +22,9 @@ def test_read_array_except():


def test_read_string_except():
'''
Test the exception raised when passing incorrect data files
'''
clust = clue.clusterer(0.4, 5., 1.2)

with pytest.raises(ValueError):
Expand All @@ -23,6 +33,9 @@ def test_read_string_except():

@pytest.fixture
def no_weight_dataset():
'''
Returns a dataset with no weight associated to the points
'''
x0 = np.array([0, 1, 2, 3, 4])
x1 = np.array([5, 6, 7, 8, 9])
x2 = np.array([10, 11, 12, 13, 14])
Expand All @@ -33,6 +46,9 @@ def no_weight_dataset():

@pytest.fixture
def low_dimensionality_dataset():
'''
Returns a dataset with no coordinates
'''
weight = np.array([1, 1, 1, 1, 1])
data = {'weight': weight}

Expand All @@ -41,6 +57,9 @@ def low_dimensionality_dataset():

@pytest.fixture
def high_dimensionality_dataset():
'''
Returns a 11-dimensional dataset
'''
x0 = np.array([0, 1, 2, 3, 4])
x1 = np.array([0, 1, 2, 3, 4])
x2 = np.array([0, 1, 2, 3, 4])
Expand All @@ -62,6 +81,9 @@ def high_dimensionality_dataset():
def test_handle_dataframe_except(no_weight_dataset,
low_dimensionality_dataset,
high_dimensionality_dataset):
'''
Test the error handling when passing incorrect dataframes
'''
clust = clue.clusterer(0.5, 5., 1.)

with pytest.raises(ValueError):
Expand All @@ -74,19 +96,28 @@ def test_handle_dataframe_except(no_weight_dataset,

@pytest.fixture
def file():
'''
Returns the path to a test csv file
'''
csv_file = './test_datasets/blob.csv'
return csv_file


@pytest.fixture
def dataframe():
'''
Returns the dataframe of a test dataset
'''
csv_file = './test_datasets/blob.csv'
df_ = pd.read_csv(csv_file)
return df_


@pytest.fixture
def dictionary(dataframe):
'''
Returns a test dataset as dictionary
'''
data_dict = {'x0': dataframe['x0'].values.tolist(),
'x1': dataframe['x1'].values.tolist(),
'x2': dataframe['x2'].values.tolist(),
Expand All @@ -96,6 +127,9 @@ def dictionary(dataframe):

@pytest.fixture
def lists(dataframe):
'''
Returns a test dataset as a list of lists
'''
data_lists = [dataframe['x0'].values.tolist(),
dataframe['x1'].values.tolist(),
dataframe['x2'].values.tolist(),
Expand All @@ -105,6 +139,9 @@ def lists(dataframe):

@pytest.fixture
def arrays(dataframe):
'''
Returns a test dataset as an array of arrays
'''
data_arrays = np.array([np.array(dataframe['x0'].values.tolist()),
np.array(dataframe['x1'].values.tolist()),
np.array(dataframe['x2'].values.tolist()),
Expand Down
19 changes: 19 additions & 0 deletions tests/test_kernels.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
'''
Test of the convolutional kernels
'''

import CLUEstering as clue
import pytest
import sys
sys.path.insert(1, '../CLUEstering/')


def test_flat_kernel_except():
'''
Test the exceptions raised by the flat kernel
'''
clust = clue.clusterer(0.4, 5, 1.2)
clust.read_data(clue.test_blobs(1000, 2))

Expand All @@ -16,6 +23,9 @@ def test_flat_kernel_except():


def test_gaussian_kernel_except():
'''
Test the exceptions raised by the gaussian kernel
'''
clust = clue.clusterer(0.4, 5, 1.2)
clust.read_data(clue.test_blobs(1000, 2))

Expand All @@ -27,6 +37,9 @@ def test_gaussian_kernel_except():


def test_exponential_kernel_except():
'''
Test the exceptions raised by the exponential kernel
'''
clust = clue.clusterer(0.4, 5, 1.2)
clust.read_data(clue.test_blobs(1000, 2))

Expand All @@ -38,6 +51,9 @@ def test_exponential_kernel_except():


def test_custom_kernel_except():
'''
Test the exceptions raised by the custom kernel
'''
clust = clue.clusterer(0.4, 5, 1.2)
clust.read_data(clue.test_blobs(1000, 2))

Expand All @@ -47,6 +63,9 @@ def test_custom_kernel_except():


def test_inexistent_kernel_except():
'''
Test the exceptions raised when choosing an inexistent kernel
'''
clust = clue.clusterer(0.4, 5, 1.2)
clust.read_data(clue.test_blobs(1000, 2))

Expand Down
12 changes: 12 additions & 0 deletions tests/test_moons_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
'''
Testing the algorithm on the circle dataset, a dataset where points are distributed to form
two moon shaped clusters
'''

from filecmp import cmp
import CLUEstering as clue
import os
Expand All @@ -9,10 +14,17 @@

@pytest.fixture
def moons():
'''
Returns the dataframe containing the moon dataset
'''
return pd.read_csv("./test_datasets/moons.csv")


def test_circles_clustering(moons):
'''
Checks that the output of the clustering is the one given by the truth dataset
'''

# Check if the output file already exists and if it does, delete it
if os.path.isfile('./moons_output.csv'):
os.remove('./moons_output.csv')
Expand Down
12 changes: 12 additions & 0 deletions tests/test_sissa_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
'''
Testing the algorithm on the circle dataset, a dataset where points are distributed to form
many small clusters
'''

from filecmp import cmp
import CLUEstering as clue
import os
Expand All @@ -9,10 +14,17 @@

@pytest.fixture
def sissa():
'''
Returns the dataframe containing the sissa dataset
'''
return pd.read_csv("./test_datasets/sissa.csv")


def test_circles_clustering(sissa):
'''
Checks that the output of the clustering is the one given by the truth dataset
'''

# Check if the output file already exists and if it does, delete it
if os.path.isfile('./sissa_output.csv'):
os.remove('./sissa_output.csv')
Expand Down
4 changes: 4 additions & 0 deletions tests/test_test_blobs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Test the test_blobs function, which produces a set of gaussianely distributed blobs
'''

import CLUEstering as clue
import pytest
import sys
Expand Down
Loading

0 comments on commit d1e5d59

Please sign in to comment.