Skip to content

Commit

Permalink
fix linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
selenabr committed Nov 22, 2023
1 parent 8b1e063 commit c4f8d2d
Showing 1 changed file with 97 additions and 28 deletions.
125 changes: 97 additions & 28 deletions protzilla/data_preprocessing/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@


def by_knn(
intensity_df: pd.DataFrame,
number_of_neighbours=5,
**kwargs # quantile, default is median
intensity_df: pd.DataFrame,
number_of_neighbours=5,
**kwargs # quantile, default is median
) -> tuple[pd.DataFrame, dict]:
"""
A function to perform value imputation based on KNN
Expand Down Expand Up @@ -59,8 +59,8 @@ def by_knn(


def by_simple_imputer(
intensity_df: pd.DataFrame,
strategy="mean",
intensity_df: pd.DataFrame,
strategy="mean",
) -> tuple[pd.DataFrame, dict]:
"""
A function to perform protein-wise imputations
Expand Down Expand Up @@ -101,8 +101,8 @@ def by_simple_imputer(


def by_min_per_sample(
intensity_df: pd.DataFrame,
shrinking_value=1,
intensity_df: pd.DataFrame,
shrinking_value=1,
) -> tuple[pd.DataFrame, dict]:
"""
A function to perform minimal value imputation on the level
Expand Down Expand Up @@ -145,8 +145,8 @@ def by_min_per_sample(


def by_min_per_protein(
intensity_df: pd.DataFrame,
shrinking_value=1,
intensity_df: pd.DataFrame,
shrinking_value=1,
) -> tuple[pd.DataFrame, dict]:
"""
A function to impute missing values for each protein
Expand Down Expand Up @@ -190,8 +190,8 @@ def by_min_per_protein(


def by_min_per_dataset(
intensity_df: pd.DataFrame,
shrinking_value=1,
intensity_df: pd.DataFrame,
shrinking_value=1,
) -> tuple[pd.DataFrame, dict]:
"""
A function to impute missing values for each protein
Expand Down Expand Up @@ -262,7 +262,6 @@ def by_normal_distribution_sampling(
transformed_df = long_to_wide(intensity_df)
# iterate over all protein groups
for protein_grp in transformed_df.columns:

number_of_nans = transformed_df[protein_grp].isnull().sum()

# don't impute values if there not enough values (> 1) to sample from
Expand Down Expand Up @@ -327,50 +326,116 @@ def by_normal_distribution_sampling(


def by_knn_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


def by_normal_distribution_sampling_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


def by_simple_imputer_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


def by_min_per_sample_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


def by_min_per_protein_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


def by_min_per_dataset_plot(
df, result_df, current_out, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
current_out,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
):
return _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df,
result_df,
graph_type,
graph_type_quantities,
group_by,
visual_transformation,
)


Expand All @@ -379,7 +444,7 @@ def number_of_imputed_values(input_df, result_df):


def _build_box_hist_plot(
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
df, result_df, graph_type, graph_type_quantities, group_by, visual_transformation
) -> list[Figure]:
"""
This function creates two visualisations:
Expand All @@ -398,9 +463,13 @@ def _build_box_hist_plot(

imputed_df = result_df.copy()

imputed_df[intensity_name_result_df] = list(map(lambda x, y:
y if np.isnan(x) else np.nan,
df[intensity_name_df], result_df[intensity_name_result_df]))
imputed_df[intensity_name_result_df] = list(
map(
lambda x, y: y if np.isnan(x) else np.nan,
df[intensity_name_df],
result_df[intensity_name_result_df],
)
)

if graph_type == "Boxplot":
fig1 = create_box_plots(
Expand Down

0 comments on commit c4f8d2d

Please sign in to comment.