Implement review suggestions

cschlaffner · Oct 31, 2023 · e4b002a · e4b002a
1 parent 5079deb
commit e4b002a
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 30 deletions.
diff --git a/protzilla/data_analysis/clustering.py b/protzilla/data_analysis/clustering.py
@@ -47,22 +47,22 @@ def k_means(
     :type model_selection: str
     :param scoring: The scoring metric(s) used for model evaluation.
     :type scoring: list[str]
-    :param n_clusters: the number of clusters to form as well as the number of \
+    :param n_clusters: the number of clusters to form as well as the number of 
         centroids to generate.
     :type n_clusters: int
     :param random_state: Determines random number generation for centroid initialization
     :type random_state: int
-    :param init_centroid_strategy: method for centroid initialization. Possible methods\
+    :param init_centroid_strategy: method for centroid initialization. Possible methods
         are: k-means++ and random
     :type init_centroid_strategy: str
-    :param n_init: Number of times the k-means algorithm is run with different centroid\
+    :param n_init: Number of times the k-means algorithm is run with different centroid
         seeds.
     :type n_init: int
-    :param max_iter: Maximum number of iterations of the k-means algorithm for a single\
+    :param max_iter: Maximum number of iterations of the k-means algorithm for a single
         run.
     :type max_iter: int
-    :param tolerance: Relative tolerance with regards to Frobenius norm of the \
-        difference in the cluster centers of two consecutive iterations to declare\
+    :param tolerance: Relative tolerance with regards to Frobenius norm of the 
+        difference in the cluster centers of two consecutive iterations to declare
         convergence.
     :type tolerance: float
     :returns: A dictionary containing the following elements:

diff --git a/protzilla/data_analysis/differential_expression_t_test.py b/protzilla/data_analysis/differential_expression_t_test.py
@@ -46,8 +46,8 @@ def t_test(
     :param alpha: the alpha value for the t-test
     :type alpha: float
     :param fc_threshold: threshold for the abs(log_2(fold_change)) (vertical line in a volcano plot).
-        Only proteins with a larger abs(log_2(fold_change)) are seen as differentially expressed
-    :type fc-threshold: int
+        Only proteins with a larger abs(log_2(fold_change)) than the fc_threshold are seen as differentially expressed
+    :type fc-threshold: float
     :param log_base: in case the data was previously log transformed this parameter contains the base (e.g. 2 if the data was log_2 transformed).
          If the data was not log transformed the parmeter should be ""
     :type log_base: int/str

diff --git a/protzilla/data_analysis/dimension_reduction.py b/protzilla/data_analysis/dimension_reduction.py
@@ -17,10 +17,10 @@ def t_sne(
     method: str = "barnes_hut",
 ):
     """
-    A function that uses t-SNE to reduce the dimension of a dataframe and returns a \
+    A function that uses t-SNE to reduce the dimension of a dataframe and returns a 
     dataframe in wide format with the entered number of components.
-    Please note that this function is a simplified version of t-SNE, and it only \
-    enables you to adjust the most significant parameters that affect the output. \
+    Please note that this function is a simplified version of t-SNE, and it only 
+    enables you to adjust the most significant parameters that affect the output. 
     You can find the default values for the non-adjustable parameters here:
     https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
 
@@ -30,25 +30,25 @@ def t_sne(
     :type n_components: int
     :param perplexity: the perplexity is related to the number of nearest neighbors
     :type perplexity: float
-    :param metric: The metric to use when calculating distance between instances in a \
+    :param metric: The metric to use when calculating distance between instances in a 
         feature array. Possible metrics are: euclidean, manhattan, cosine and haversine
     :type metric: str
     :param random_state: determines the random number generator.
     :type random_state: int
     :param n_iter: maximum number of iterations for the optimization
     :type n_iter: int
-    :param n_iter_without_progress: Maximum number of iterations without progress \
-        before we abort the optimization, used after 250 initial iterations with early \
-        exaggeration. Note that progress is only checked every 50 iterations so this \
+    :param n_iter_without_progress: Maximum number of iterations without progress 
+        before we abort the optimization, used after 250 initial iterations with early 
+        exaggeration. Note that progress is only checked every 50 iterations so this 
         value is rounded to the next multiple of 50.
     :type n_iter_without_progress: int
-    :param method: the method exact will run on the slower, but exact, algorithm in \
-        O(N^2) time. However, the exact method cannot scale to millions of examples. \
+    :param method: the method exact will run on the slower, but exact, algorithm in 
+        O(N^2) time. However, the exact method cannot scale to millions of examples. 
         Barnes-Hut approximation will run faster, but not exact, in O(NlogN) time.
     :type method: str
-    :return: a dictionary with a single key, "embedded_data," which contains a new
+    :return: a dictionary with a single key, "embedded_data", which contains a new
         DataFrame in wide format. This DataFrame consists of the t-SNE embedded data
-        with two columns, "Component1" and "Component2," and shares the same index as
+        with two columns, "Component1" and "Component2", and shares the same index as
         the input_df.
     :rtype: dict
     """
@@ -114,36 +114,36 @@ def umap(
     transform_seed: int = 42,
 ):
     """
-    A function that uses UMAP to reduce the dimension of a dataframe and returns a \
+    A function that uses UMAP to reduce the dimension of a dataframe and returns a 
     dataframe in wide format with the entered number of components.
-    Please note that this function is a simplified version of UMAP, and it only \
-    enables you to adjust the most significant parameters that affect the output. \
+    Please note that this function is a simplified version of UMAP, and it only 
+    enables you to adjust the most significant parameters that affect the output. 
     You can find the default values for the non-adjustable parameters here:
     https://umap-learn.readthedocs.io/en/latest/api.html
 
     :param input_df: the dataframe, whose dimensions should be reduced.
     :type input_df: pd.DataFrame
     :param n_components: The dimension of the space to embed into.
     :type n_components: int
-    :param n_neighbors: The size of local neighborhood in terms of number of \
+    :param n_neighbors: The size of local neighborhood in terms of number of 
         neighboring sample points
     :type n_neighbors: float
-    :param min_dist: the effective minimum distance between embedded points. Smaller \
-        values will result in a more clustered/clumped embedding where nearby points on \
-        the manifold are drawn closer together, while larger values will result on a more \
+    :param min_dist: the effective minimum distance between embedded points. Smaller 
+        values will result in a more clustered/clumped embedding where nearby points on 
+        the manifold are drawn closer together, while larger values will result on a more 
         even dispersal of points.
     :type min_dist: float
-    :param metric: The metric to use when calculating distance between instances in a \
+    :param metric: The metric to use when calculating distance between instances in a 
         feature array.
     :type metric: str
     :param random_state: determines the random number generator.
     :type random_state: int
     :param transform_seed: Random seed used for the stochastic aspects of the transform
         operation.
     :type transform_seed: int
-    :return: a dictionary with a single key, "embedded_data," which contains a new
+    :return: a dictionary with a single key, "embedded_data", which contains a new
         DataFrame in wide format. This DataFrame consists of the UMAP embedded data
-        with two columns, "Component1" and "Component2," and shares the same index as
+        with two columns, "Component1" and "Component2", and shares the same index as
         the input_df.
     :rtype: dict
     """

diff --git a/protzilla/data_analysis/plots.py b/protzilla/data_analysis/plots.py
@@ -16,7 +16,7 @@ def scatter_plot(
     Function to create a scatter plot from data.
 
     :param input_df: the dataframe that should be plotted. It should have either 2
-        or 3 dimension
+        or 3 dimensions
     :type input_df: pd.Dataframe
     :param color_df: the Dataframe with one column according to which the marks should
         be colored. This is an optional parameter