fix linting issues

cschlaffner · Nov 22, 2023 · c4f8d2d · c4f8d2d
1 parent 8b1e063
commit c4f8d2d
Showing 1 changed file with 97 additions and 28 deletions.
diff --git a/protzilla/data_preprocessing/imputation.py b/protzilla/data_preprocessing/imputation.py
@@ -13,9 +13,9 @@
 
 
 def by_knn(
-        intensity_df: pd.DataFrame,
-        number_of_neighbours=5,
-        **kwargs  # quantile, default is median
+    intensity_df: pd.DataFrame,
+    number_of_neighbours=5,
+    **kwargs  # quantile, default is median
 ) -> tuple[pd.DataFrame, dict]:
     """
     A function to perform value imputation based on KNN
@@ -59,8 +59,8 @@ def by_knn(
 
 
 def by_simple_imputer(
-        intensity_df: pd.DataFrame,
-        strategy="mean",
+    intensity_df: pd.DataFrame,
+    strategy="mean",
 ) -> tuple[pd.DataFrame, dict]:
     """
     A function to perform protein-wise imputations
@@ -101,8 +101,8 @@ def by_simple_imputer(
 
 
 def by_min_per_sample(
-        intensity_df: pd.DataFrame,
-        shrinking_value=1,
+    intensity_df: pd.DataFrame,
+    shrinking_value=1,
 ) -> tuple[pd.DataFrame, dict]:
     """
     A function to perform  minimal value imputation on the level
@@ -145,8 +145,8 @@ def by_min_per_sample(
 
 
 def by_min_per_protein(
-        intensity_df: pd.DataFrame,
-        shrinking_value=1,
+    intensity_df: pd.DataFrame,
+    shrinking_value=1,
 ) -> tuple[pd.DataFrame, dict]:
     """
     A function to impute missing values for each protein
@@ -190,8 +190,8 @@ def by_min_per_protein(
 
 
 def by_min_per_dataset(
-        intensity_df: pd.DataFrame,
-        shrinking_value=1,
+    intensity_df: pd.DataFrame,
+    shrinking_value=1,
 ) -> tuple[pd.DataFrame, dict]:
     """
     A function to impute missing values for each protein
@@ -262,7 +262,6 @@ def by_normal_distribution_sampling(
         transformed_df = long_to_wide(intensity_df)
         # iterate over all protein groups
         for protein_grp in transformed_df.columns:
-
             number_of_nans = transformed_df[protein_grp].isnull().sum()
 
             # don't impute values if there not enough values (> 1) to sample from
@@ -327,50 +326,116 @@ def by_normal_distribution_sampling(
 
 
 def by_knn_plot(
-        df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
 def by_normal_distribution_sampling_plot(
-    df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
 def by_simple_imputer_plot(
-        df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
 def by_min_per_sample_plot(
-        df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
 def by_min_per_protein_plot(
-        df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
 def by_min_per_dataset_plot(
-        df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
+    df,
+    result_df,
+    current_out,
+    graph_type,
+    graph_type_quantities,
+    group_by,
+    visual_transformation,
 ):
     return _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+        df,
+        result_df,
+        graph_type,
+        graph_type_quantities,
+        group_by,
+        visual_transformation,
     )
 
 
@@ -379,7 +444,7 @@ def number_of_imputed_values(input_df, result_df):
 
 
 def _build_box_hist_plot(
-        df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
+    df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
 ) -> list[Figure]:
     """
     This function creates two visualisations:
@@ -398,9 +463,13 @@ def _build_box_hist_plot(
 
     imputed_df = result_df.copy()
 
-    imputed_df[intensity_name_result_df] = list(map(lambda x, y:
-                       y if np.isnan(x) else np.nan,
-                       df[intensity_name_df], result_df[intensity_name_result_df]))
+    imputed_df[intensity_name_result_df] = list(
+        map(
+            lambda x, y: y if np.isnan(x) else np.nan,
+            df[intensity_name_df],
+            result_df[intensity_name_result_df],
+        )
+    )
 
     if graph_type == "Boxplot":
         fig1 = create_box_plots(