remove \ in data_preprocessing docstrings

cschlaffner · Oct 31, 2023 · d3a7943 · d3a7943
1 parent e4b002a
commit d3a7943
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 59 deletions.
diff --git a/protzilla/data_preprocessing/filter_proteins.py b/protzilla/data_preprocessing/filter_proteins.py
@@ -8,15 +8,15 @@ def by_samples_missing(intensity_df, percentage):
     This function filters proteins based on its amount of nan values.
     If the percentage of existing values is below a threshold (percentage), the protein is filtered out.
 
-    :param df: the intensity dataframe that should be filtered\
+    :param df: the intensity dataframe that should be filtered
         in long format
     :type df: pd.DataFrame
-    :param percentage: float ranging from 0 to 1. Defining the\
-        relative share of samples the proteins should be present in inorder to be kept.\
+    :param percentage: float ranging from 0 to 1. Defining the
+        relative share of samples the proteins should be present in inorder to be kept.
     :type percentage: float
     
-    :return: returns the filtered df as a Dataframe and a dict with a listof Protein IDs\
-        that were discarded and a list of Protein IDs\
+    :return: returns the filtered df as a Dataframe and a dict with a listof Protein IDs
+        that were discarded and a list of Protein IDs
         that were kept
     :rtype: Tuple[pandas DataFrame, dict]
     """

diff --git a/protzilla/data_preprocessing/imputation.py b/protzilla/data_preprocessing/imputation.py
@@ -29,17 +29,18 @@ def by_knn(
     Implements an instance of the sklearn.impute KNNImputer
     class.
     https://scikit-learn.org/stable/modules/generated/sklearn.impute.KNNImputer.html
-    :param intensity_df: the dataframe that should be filtered in\
-    long format
+
+    :param intensity_df: the dataframe that should be filtered in
+        long format
     :type intensity_df: pandas DataFrame
-    :param number_of_neighbours: number of neighbouring samples used for\
-    imputation. Default: 5
+    :param number_of_neighbours: number of neighbouring samples used for
+        imputation. Default: 5
     :type number_of_neighbours: int
-    :param **kwargs: additional keyword arguments passed to\
+    :param **kwargs: additional keyword arguments passed to
         KNNImputer.fit_transform
     :type kwargs: dict
-    :return: returns an imputed dataframe in typical protzilla long format\
-    and an empty dict
+    :return: returns an imputed dataframe in typical protzilla long format
+        and an empty dict
     :rtype: pd.DataFrame
     """
 
@@ -74,14 +75,15 @@ def by_simple_imputer(
     no data will be imputed. This function automatically filters
     out such proteins from the DataFrame beforehand.
 
-    :param intensity_df: the dataframe that should be filtered in\
-    long format
+    :param intensity_df: the dataframe that should be filtered in
+        long format
     :type intensity_df: pandas DataFrame
-    :param strategy: Defines the imputation strategy. Can be "mean",\
-    "median" or "most_frequent" (for mode).
+    :param strategy: Defines the imputation strategy. Can be "mean",
+        "median" or "most_frequent" (for mode).
     :type strategy: str
-    :return: returns an imputed dataframe in typical protzilla long format\
-    and an empty dict
+
+    :return: returns an imputed dataframe in typical protzilla long format
+        and an empty dict
     :rtype: pd.DataFrame, int
     """
     assert strategy in ["mean", "median", "most_frequent"]
@@ -157,16 +159,16 @@ def by_min_per_protein(
     take a fraction of that minimum value for imputation.
     CAVE: All proteins without any values will be filtered out.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pandas DataFrame
-    :param shrinking_value: a factor to alter the minimum value\
-        used for imputation. With a shrinking factor of 0.1 for\
-        example, a tenth of the minimum value found will be used for\
+    :param shrinking_value: a factor to alter the minimum value
+        used for imputation. With a shrinking factor of 0.1 for
+        example, a tenth of the minimum value found will be used for
         imputation. Default: 1 (no shrinking)
     :type shrinking_value: float
 
-    :return: returns an imputed dataframe in typical protzilla long format\
+    :return: returns an imputed dataframe in typical protzilla long format
         and an empty dict
     :rtype: pd.DataFrame, dict
     """
@@ -202,16 +204,16 @@ def by_min_per_dataset(
     the dataframe. The user can also assign a shrinking factor to
     take a fraction of that minimum value for imputation.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pandas DataFrame
-    :param shrinking_value: a factor to alter the minimum value\
-        used for imputation. With a shrinking factor of 0.1 for\
-        example, a tenth of the minimum value found will be used for\
+    :param shrinking_value: a factor to alter the minimum value
+        used for imputation. With a shrinking factor of 0.1 for
+        example, a tenth of the minimum value found will be used for
         imputation. Default: 1 (no shrinking)
     :type shrinking_value: float
     
-    :return: returns an imputed dataframe in typical protzilla long format\
+    :return: returns an imputed dataframe in typical protzilla long format
         and an empty dict
     :rtype: pd.DataFrame, dict
     """
@@ -279,7 +281,6 @@ def _build_box_hist_plot(
 
     2. a graph summarising the amount of
     filtered proteins.
-
     """
     if graph_type == "Boxplot":
         fig1 = create_box_plots(

diff --git a/protzilla/data_preprocessing/normalisation.py b/protzilla/data_preprocessing/normalisation.py
@@ -14,11 +14,11 @@ def by_z_score(intensity_df: pd.DataFrame) -> tuple[pd.DataFrame, dict]:
     Scales the data to zero mean and unit variance. This is often also
     called z-score normalisation/transformation.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pd.DataFrame
 
-    :return: returns a scaled dataframe in typical protzilla long format and an empty\
+    :return: returns a scaled dataframe in typical protzilla long format and an empty
         dictionary
     :rtype: Tuple[pandas DataFrame, dict]
     """
@@ -56,14 +56,14 @@ def by_median(
     Divides each intensity by the chosen intensity quartile of the
     respective sample. By default, the median (50%-quartile) is used.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pandas DataFrame
-    :param percentile: the chosen quartile of the sample intensities for\
+    :param percentile: the chosen quartile of the sample intensities for
         normalisation
     :type percentile: float
 
-    :return: returns a scaled dataframe in typical protzilla long format\
+    :return: returns a scaled dataframe in typical protzilla long format
         and a dict, containing all zeroed samples due to quantile being 0
     :rtype: Tuple[pandas DataFrame, dict]
     """
@@ -118,11 +118,11 @@ def by_totalsum(intensity_df: pd.DataFrame) -> tuple[pd.DataFrame, dict]:
     Normalises the data on the level of each sample.
     Divides each intensity by the total sum of sample intensities.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pandas DataFrame
 
-    :return: returns a scaled dataframe in typical protzilla long format\
+    :return: returns a scaled dataframe in typical protzilla long format
         and a dict, containing all zeroed samples due to sum being 0
     :rtype: Tuple[pandas DataFrame, dict]
     """
@@ -181,12 +181,12 @@ def by_reference_protein(
     protein in each sample. Samples where this value is zero will be
     removed and returned separately.
 
-    :param intensity_df: the dataframe that should be filtered in\
+    :param intensity_df: the dataframe that should be filtered in
         long format
     :type intensity_df: pandas DataFrame
     :param reference_protein: Protein ID of the protein to normalise by
         type reference_protein_id: str
-    :return: returns a scaled dataframe in typical protzilla long format \
+    :return: returns a scaled dataframe in typical protzilla long format 
         and dict with a list of the indices of the dropped samples
     :rtype: Tuple[pandas DataFrame, dict]
     """

diff --git a/protzilla/data_preprocessing/outlier_detection.py b/protzilla/data_preprocessing/outlier_detection.py
@@ -30,7 +30,7 @@ def by_isolation_forest(
         all kernels (-1)
     :type n_jobs: integer
 
-    :return: returns a Dataframe containing all samples that are not outliers and a\
+    :return: returns a Dataframe containing all samples that are not outliers and a
         dict with list of outlier sample names
     :rtype: Tuple[pandas DataFrame, dict]
     """
@@ -95,7 +95,7 @@ def by_local_outlier_factor(
         all kernels (-1)
     :type n_jobs: int
 
-    :return: returns a Dataframe containing all samples that are not outliers and a\
+    :return: returns a Dataframe containing all samples that are not outliers and a
         dict with list of outlier sample names
     :rtype: Tuple[pandas DataFrame, dict]
     """
@@ -154,9 +154,9 @@ def by_pca(
     :type number_of_components: integer (2 or 3)
     
     :return: returns a Dataframe containing all samples that are not outliers.
-        A dict with list of inlier sample names, a DataFrame that contains the projection \
-        of the intensity_df on first principal components, a list that contains the \
-        explained variation for each component and an int, the number of components \
+        A dict with list of inlier sample names, a DataFrame that contains the projection 
+        of the intensity_df on first principal components, a list that contains the 
+        explained variation for each component and an int, the number of components 
         the calculations were executed with
     :rtype: Tuple[pandas DataFrame, dict]
     """

diff --git a/protzilla/data_preprocessing/peptide_filter.py b/protzilla/data_preprocessing/peptide_filter.py
@@ -14,7 +14,7 @@ def by_pep_value(
     :type intensity_df: pd.Dataframe
     :param peptide_df: the pandas dataframe containing the peptide information
     :type peptide_df: pd.Dataframe
-    :param threshold: peptides with a PEP-value below this threshold will be filtered\
+    :param threshold: peptides with a PEP-value below this threshold will be filtered
         out
     :type threshold: float
 

diff --git a/protzilla/data_preprocessing/plots.py b/protzilla/data_preprocessing/plots.py
@@ -123,10 +123,10 @@ def create_box_plots(
     (for example before and after filtering/normalisation) and creates
     a visualisation for each one.
 
-    :param dataframe_a: First dataframe in protzilla long format for\
+    :param dataframe_a: First dataframe in protzilla long format for
         first boxplot
     :type dataframe_a: pd.DataFrame
-    :param dataframe_b: Second dataframe in protzilla long format\
+    :param dataframe_b: Second dataframe in protzilla long format
         for second boxplot
     :type dataframe_b: pd.DataFrame
     :param name_a: Name of first boxplot
@@ -139,10 +139,10 @@ def create_box_plots(
     :type y_title: str
     :param x_title: Optional x-axis title for graphs.
     :type x_title: str
-    :param group_by: Optional argument to create a grouped boxplot\
-        graph. Arguments can be either "Sample" to group by sample or\
-        "Protein ID" to group by protein. Leave "None" to get ungrouped\
-        conventional graphs. If set the function will ignore the\
+    :param group_by: Optional argument to create a grouped boxplot
+        graph. Arguments can be either "Sample" to group by sample or
+        "Protein ID" to group by protein. Leave "None" to get ungrouped
+        conventional graphs. If set the function will ignore the
         graph_type argument. Default is "None".
     :type group_by: str
 
@@ -224,10 +224,10 @@ def create_histograms(
     (for example before and after filtering/normalisation) and creates
     a visualisation for each one.
 
-    :param dataframe_a: First dataframe in protzilla long format for\
+    :param dataframe_a: First dataframe in protzilla long format for
         first histogram
     :type dataframe_a: pd.DataFrame
-    :param dataframe_b: Second dataframe in protzilla long format\
+    :param dataframe_b: Second dataframe in protzilla long format
         for second histogram
     :type dataframe_b: pd.DataFrame
     :param name_a: Name of first histogram
@@ -290,7 +290,7 @@ def create_anomaly_score_bar_plot(
     This function creates a graph visualising the outlier
     and non-outlier samples using the anomaly score.
 
-    :param anomaly_df: pandas Dataframe that contains the anomaly score for each\
+    :param anomaly_df: pandas Dataframe that contains the anomaly score for each
         sample, including outliers and on-outliers samples
     :type anomaly_df: pd.DataFrame
     :param colour_outlier: hex code for colour depicting the outliers.
@@ -352,10 +352,10 @@ def create_pca_2d_scatter_plot(
     and non-outlier points by showing the principal components. It
     returns a ploty Figure object.
 
-    :param pca_df: a DataFrame that contains the projection of\
+    :param pca_df: a DataFrame that contains the projection of
         the intensity_df on first principal components
     :type pca_df: pd.DataFrame
-    :param explained_variance_ratio: a list that contains the\
+    :param explained_variance_ratio: a list that contains the
         explained variation for each component
     :type explained_variance_ratio: list
     :param colour_outlier: hex code for colour depicting the outliers.
@@ -407,10 +407,10 @@ def create_pca_3d_scatter_plot(
     and non-outlier points by showing the principal components. It
     returns a ploty Figure object.
 
-    :param pca_df: a DataFrame that contains the projection of\
+    :param pca_df: a DataFrame that contains the projection of
         the intensity_df on first principal components
     :type pca_df: pd.DataFrame
-    :param explained_variance_ratio: a list that contains the\
+    :param explained_variance_ratio: a list that contains the
         explained variation for each component
     :type explained_variance_ratio: list
     :param colour_outlier: hex code for colour depicting the outliers.

diff --git a/protzilla/data_preprocessing/transformation.py b/protzilla/data_preprocessing/transformation.py
@@ -12,11 +12,11 @@ def by_log(intensity_df: pd.DataFrame, log_base="log10"):
 
     :param intensity_df: a protein data frame in long format
     :type intensity_df: pd.DataFrame
-    :param log_base: String of the used log method "log10" (base 10)\
+    :param log_base: String of the used log method "log10" (base 10)
         or "log2" (base 2). Default: "log10"
     :type log_base: str
 
-    :return: returns a pandas DataFrame in typical protzilla\
+    :return: returns a pandas DataFrame in typical protzilla
         long format with the transformed data and an empty dict.
     :rtype: Tuple[pandas DataFrame, dict]
     """