diff --git a/protzilla/constants/workflow_meta.json b/protzilla/constants/workflow_meta.json index 9c0d6dc16..b7d3457b1 100644 --- a/protzilla/constants/workflow_meta.json +++ b/protzilla/constants/workflow_meta.json @@ -531,6 +531,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { @@ -578,6 +587,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { @@ -625,6 +643,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { @@ -675,6 +702,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { @@ -722,6 +758,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { @@ -785,6 +830,15 @@ "Protein ID" ], "default": "None" + }, + "visual_transformation": { + "name": "Visual Transformation:", + "type": "categorical", + "categories": [ + "linear", + "log10" + ], + "default": "linear" } }, { diff --git a/protzilla/data_preprocessing/imputation.py b/protzilla/data_preprocessing/imputation.py index d739ebb3c..90d95ea06 100644 --- a/protzilla/data_preprocessing/imputation.py +++ b/protzilla/data_preprocessing/imputation.py @@ -328,50 +328,116 @@ def by_normal_distribution_sampling( def by_knn_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) def by_normal_distribution_sampling_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) def by_simple_imputer_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) def by_min_per_sample_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) def by_min_per_protein_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) def by_min_per_dataset_plot( - df, result_df, current_out, graph_type, graph_type_quantities, group_by + df, + result_df, + current_out, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ): return _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df, + result_df, + graph_type, + graph_type_quantities, + group_by, + visual_transformation, ) @@ -380,7 +446,12 @@ def number_of_imputed_values(input_df, result_df): def _build_box_hist_plot( - df, result_df, graph_type, graph_type_quantities, group_by + df: pd.DataFrame, + result_df: pd.DataFrame, + graph_type: str = "Boxplot", + graph_type_quantities: str = "Pie chart", + group_by: str = "None", + visual_transformation: str = "linear", ) -> list[Figure]: """ This function creates two visualisations: @@ -393,22 +464,39 @@ def _build_box_hist_plot( 2. a graph summarising the amount of filtered proteins. """ + + intensity_name_df = df.columns[3] + intensity_name_result_df = result_df.columns[3] + + imputed_df = result_df.copy() + + imputed_df[intensity_name_result_df] = list( + map( + lambda x, y: y if np.isnan(x) else np.nan, + df[intensity_name_df], + result_df[intensity_name_result_df], + ) + ) + if graph_type == "Boxplot": fig1 = create_box_plots( dataframe_a=df, - dataframe_b=result_df, - name_a="Before Imputation", - name_b="After Imputation", + dataframe_b=imputed_df, + name_a="Original Values", + name_b="Imputed Values", heading="Distribution of Protein Intensities", group_by=group_by, + visual_transformation=visual_transformation, ) elif graph_type == "Histogram": fig1 = create_histograms( dataframe_a=df, - dataframe_b=result_df, - name_a="Before Imputation", - name_b="After Imputation", + dataframe_b=imputed_df, + name_a="Original Values", + name_b="Imputed Values", heading="Distribution of Protein Intensities", + visual_transformation=visual_transformation, + overlay=True, ) values_of_sectors = [ diff --git a/protzilla/data_preprocessing/plots.py b/protzilla/data_preprocessing/plots.py index a4cfcb411..c53919bf8 100644 --- a/protzilla/data_preprocessing/plots.py +++ b/protzilla/data_preprocessing/plots.py @@ -1,9 +1,13 @@ import pandas as pd +import numpy as np +import math import plotly.express as px import plotly.graph_objects as go from plotly.graph_objects import Figure from plotly.subplots import make_subplots +from protzilla.data_preprocessing.plots_helper import generate_tics + from ..constants.colors import ( PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE, PROTZILLA_DISCRETE_COLOR_SEQUENCE, @@ -13,7 +17,7 @@ def create_pie_plot( names_of_sectors: "list[str]", values_of_sectors: "list[int]", - heading="", + heading: str = "", ) -> Figure: """ Function to create generic pie graph from data. @@ -21,13 +25,9 @@ def create_pie_plot( a whole. :param names_of_sectors: Name of parts (so-called sectors) or categories - :type names_of_sectors: list[str] :param values_of_sectors: Corresponding values for sectors - :type values_of_sectors: list[str] :param heading: Header for the graph - for example the topic - :type heading: str :return: returns a pie chart of the data - :rtype: Figure (plotly object) """ if any(i < 0 for i in values_of_sectors): raise ValueError @@ -55,10 +55,10 @@ def create_pie_plot( def create_bar_plot( names_of_sectors: "list[str]", values_of_sectors: "list[int]", - heading="", + heading: str = "", colour: "list[str]" = PROTZILLA_DISCRETE_COLOR_SEQUENCE, - y_title="", - x_title="", + y_title: str = "", + x_title: str = "", ) -> Figure: """ Function to create generic bar graph from data. @@ -66,17 +66,11 @@ def create_bar_plot( a whole. :param names_of_sectors: Name of parts (so called sectors) or categories - :type names_of_sectors: list[str] :param values_of_sectors: Corresponding values for sectors - :type values_of_sectors: list[str] :param heading: Header for the graph - for example the topic - :type heading: str :param y_title: Optional y-axis title. - :type y_title: str :param x_title: Optional x-axis title. - :type x_title: str :return: returns a bar chart of the data - :rtype: Figure (plotly object) """ fig = px.bar( @@ -110,12 +104,13 @@ def create_bar_plot( def create_box_plots( dataframe_a: pd.DataFrame, dataframe_b: pd.DataFrame, - name_a="", - name_b="", - heading="", - y_title="", - x_title="", + name_a: str = "", + name_b: str = "", + heading: str = "", + y_title: str = "", + x_title: str = "", group_by: str = "None", + visual_transformation: str = "linear", ) -> Figure: """ A function to create a boxplot for visualisation @@ -123,31 +118,23 @@ def create_box_plots( (for example before and after filtering/normalisation) and creates a visualisation for each one. - :param dataframe_a: First dataframe in protzilla long format for - first boxplot - :type dataframe_a: pd.DataFrame - :param dataframe_b: Second dataframe in protzilla long format - for second boxplot - :type dataframe_b: pd.DataFrame + :param dataframe_a: First dataframe in protzilla long format for\ + first boxplot + :param dataframe_b: Second dataframe in protzilla long format\ + for second boxplot + :param name_a: Name of first boxplot - :type name_a: str :param name_b: Name of second boxplot - :type name_b: str :param heading: Header or title for the graph (optional) - :type heading: str :param y_title: Optional y-axis title for graphs. - :type y_title: str :param x_title: Optional x-axis title for graphs. - :type x_title: str - :param group_by: Optional argument to create a grouped boxplot - graph. Arguments can be either "Sample" to group by sample or - "Protein ID" to group by protein. Leave "None" to get ungrouped - conventional graphs. If set the function will ignore the - graph_type argument. Default is "None". - :type group_by: str - + :param group_by: Optional argument to create a grouped boxplot\ + :param visual_transformation: Visual transformation of the y-axis data. + graph. Arguments can be either "Sample" to group by sample or\ + "Protein ID" to group by protein. Leave "None" to get ungrouped\ + conventional graphs. If set the function will ignore the\ + graph_type argument. Default is "None". :return: returns a boxplot of the data - :rtype: Figure (plotly object) """ if group_by not in {"None", "Sample", "Protein ID"}: raise ValueError( @@ -205,6 +192,8 @@ def create_box_plots( "yanchor": "top", }, ) + if visual_transformation == "log10": + fig.update_yaxes(type="log") fig.update_yaxes(rangemode="tozero") return fig @@ -212,11 +201,13 @@ def create_box_plots( def create_histograms( dataframe_a: pd.DataFrame, dataframe_b: pd.DataFrame, - name_a="", - name_b="", - heading="", - y_title="", - x_title="", + name_a: str = "", + name_b: str = "", + heading: str = "", + y_title: str = "", + x_title: str = "", + visual_transformation: str = "linear", + overlay: bool = False, ) -> Figure: """ A function to create a histogram for visualisation @@ -224,42 +215,79 @@ def create_histograms( (for example before and after filtering/normalisation) and creates a visualisation for each one. - :param dataframe_a: First dataframe in protzilla long format for - first histogram - :type dataframe_a: pd.DataFrame - :param dataframe_b: Second dataframe in protzilla long format - for second histogram - :type dataframe_b: pd.DataFrame + :param dataframe_a: First dataframe in protzilla long format for\ + first histogram + :param dataframe_b: Second dataframe in protzilla long format\ + for second histogram + :param name_a: Name of first histogram - :type name_a: str :param name_b: Name of second histogram - :type name_b: str :param heading: Header or title for the graph (optional) - :type heading: str :param y_title: Optional y axis title for graphs. - :type y_title: str :param x_title: Optional x axis title for graphs. - :type x_title: str + :param overlay: Specifies whether to draw one Histogram with overlay or two separate histograms + :param visual_transformation: Visual transformation of the y-axis data. :return: returns a pie or bar chart of the data - :rtype: Figure (plotly object) """ + if visual_transformation not in {"linear", "log10"}: + raise ValueError( + f"""visual_transformation parameter must be "linear" or + "log10" but is {visual_transformation}""" + ) + intensity_name_a = dataframe_a.columns[3] intensity_name_b = dataframe_b.columns[3] - fig = make_subplots(rows=1, cols=2) + + intensities_a = dataframe_a[intensity_name_a] + intensities_b = dataframe_b[intensity_name_b] + + if visual_transformation == "log10": + intensities_a = intensities_a.apply(np.log10) + intensities_b = intensities_b.apply(np.log10) + + min_value = min(intensities_a.min(skipna=True), intensities_b.min(skipna=True)) + max_value = max(intensities_a.max(skipna=True), intensities_b.max(skipna=True)) + + binsize_factor = 0.0005 if visual_transformation == "linear" else 0.02 + trace0 = go.Histogram( - x=dataframe_a[intensity_name_a], + x=intensities_a, marker_color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[0], name=name_a, + xbins=dict( + start=min_value, + end=max_value, + size=(max_value - min_value) * binsize_factor, + ), ) trace1 = go.Histogram( - x=dataframe_b[intensity_name_b], + x=intensities_b, marker_color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[1], name=name_b, + xbins=dict( + start=min_value, + end=max_value, + size=(max_value - min_value) * binsize_factor, + ), ) - fig.add_trace(trace0, 1, 1) - fig.add_trace(trace1, 1, 2) - fig.update_layout(bargap=0.2) + if not overlay: + fig = make_subplots(rows=1, cols=2) + fig.add_trace(trace0, 1, 1) + fig.add_trace(trace1, 1, 2) + if visual_transformation == "log10": + fig.update_layout( + xaxis=generate_tics(0, max_value, True), + xaxis2=generate_tics(0, max_value, True), + ) + else: + fig = go.Figure() + fig.add_trace(trace0) + fig.add_trace(trace1) + fig.update_layout(barmode="overlay") + fig.update_traces(opacity=0.75) + if visual_transformation == "log10": + fig.update_layout(xaxis=generate_tics(0, max_value, True)) fig.update_layout( xaxis_title=x_title, @@ -278,31 +306,27 @@ def create_histograms( }, ) fig.update_yaxes(rangemode="tozero") + return fig def create_anomaly_score_bar_plot( - anomaly_df, - colour_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], - colour_non_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], + anomaly_df: pd.DataFrame, + colour_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], + colour_non_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], ) -> Figure: """ This function creates a graph visualising the outlier and non-outlier samples using the anomaly score. - :param anomaly_df: pandas Dataframe that contains the anomaly score for each - sample, including outliers and on-outliers samples - :type anomaly_df: pd.DataFrame + :param anomaly_df: pandas Dataframe that contains the anomaly score for each\ + sample, including outliers and on-outliers samples :param colour_outlier: hex code for colour depicting the outliers. - Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour - :type colour_outlier: str + Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour :param colour_non_outlier: hex code for colour depicting the - non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE - non-outlier colour - :type colour_non_outlier: str - + non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE + non-outlier colour :return: returns a plotly Figure object - :rtype: Figure (plotly object) """ fig = px.bar( @@ -342,32 +366,27 @@ def create_anomaly_score_bar_plot( def create_pca_2d_scatter_plot( - pca_df, - explained_variance_ratio, - colour_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], - colour_non_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], + pca_df: pd.DataFrame, + explained_variance_ratio: list, + colour_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], + colour_non_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], ) -> Figure: """ This function creates a graph visualising the outlier and non-outlier points by showing the principal components. It returns a ploty Figure object. - :param pca_df: a DataFrame that contains the projection of - the intensity_df on first principal components - :type pca_df: pd.DataFrame - :param explained_variance_ratio: a list that contains the - explained variation for each component - :type explained_variance_ratio: list + :param pca_df: a DataFrame that contains the projection of\ + the intensity_df on first principal components + :param explained_variance_ratio: a list that contains the\ + explained variation for each component :param colour_outlier: hex code for colour depicting the outliers. - Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour - :type colour_outlier: str + Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour :param colour_non_outlier: hex code for colour depicting the - non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE - non-outlier colour - :type colour_non_outlier: str + non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE + non-outlier colour :return: returns a plotly Figure object - :rtype: Figure (plotly object) """ fig = go.Figure( data=go.Scatter( @@ -397,32 +416,27 @@ def create_pca_2d_scatter_plot( def create_pca_3d_scatter_plot( - pca_df, - explained_variance_ratio, - colour_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], - colour_non_outlier=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], + pca_df: pd.DataFrame, + explained_variance_ratio: list, + colour_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1], + colour_non_outlier: str = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0], ) -> Figure: """ This function creates a graph visualising the outlier and non-outlier points by showing the principal components. It returns a ploty Figure object. - :param pca_df: a DataFrame that contains the projection of - the intensity_df on first principal components - :type pca_df: pd.DataFrame - :param explained_variance_ratio: a list that contains the - explained variation for each component - :type explained_variance_ratio: list + :param pca_df: a DataFrame that contains the projection of\ + the intensity_df on first principal components + :param explained_variance_ratio: a list that contains the\ + explained variation for each component :param colour_outlier: hex code for colour depicting the outliers. - Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour - :type colour_outlier: str + Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE outlier colour :param colour_non_outlier: hex code for colour depicting the - non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE - non-outlier colour - :type colour_non_outlier: str + non-outliers. Default: PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE + non-outlier colour :return: returns a plotly Figure object - :rtype: Figure (plotly object) """ fig = go.Figure( data=go.Scatter3d( diff --git a/protzilla/data_preprocessing/plots_helper.py b/protzilla/data_preprocessing/plots_helper.py new file mode 100644 index 000000000..f1a793b8e --- /dev/null +++ b/protzilla/data_preprocessing/plots_helper.py @@ -0,0 +1,46 @@ +import math + +import numpy as np + + +def generate_tics(lower_bound, upper_bound, log: bool): + """ + Generates a dictionary, mapping equally spaced positions for labels, in the interval between min and max + + :param lower_bound: lower bound of the interval to create labels for + :param upper_bound: upper bound of the interval to create labels for + :param log: specifies whether the scale is logarithmic, and the labels should be pow 10 + :return: the dictionary + """ + temp = math.floor(np.log10(upper_bound - lower_bound) / 2) + step_size = pow(10, temp) + first_step = math.ceil(lower_bound / step_size) * step_size + last_step = math.ceil(upper_bound / step_size) * step_size + 3 * step_size + tickvals = np.arange(first_step, last_step, step_size) + if log: + ticktext = np.vectorize(lambda x: millify(pow(10, x)))(tickvals) + else: + ticktext = np.vectorize(lambda x: millify(x))(tickvals) + return dict( + tickmode="array", + tickvals=tickvals, + ticktext=ticktext, + ) + +def millify(n)->str: + """ + Writes the number n in shortened style with shorthand symbol for every power of 1000 + + :param n: the number to be written in shortened style + :return: a String containing the shortened number + """ + millnames = ["", "K", "M", "B", "T", "Q", "Q", "S", "S", "O", "N"] + n = float(n) + millidx = max( + 0, + min( + len(millnames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3)) + ), + ) + + return "{:.0f}{}".format(n / 10 ** (3 * millidx), millnames[millidx]) diff --git a/tests/protzilla/data_preprocessing/test_imputation.py b/tests/protzilla/data_preprocessing/test_imputation.py index 40caba91a..56ed695df 100644 --- a/tests/protzilla/data_preprocessing/test_imputation.py +++ b/tests/protzilla/data_preprocessing/test_imputation.py @@ -155,7 +155,13 @@ def test_imputation_min_value_per_df( result_df = by_min_per_dataset(input_imputation_df, shrinking_value=0.1)[0] fig1, fig2 = by_min_per_dataset_plot( - input_imputation_df, result_df, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() @@ -180,7 +186,13 @@ def test_imputation_min_value_per_sample( result_df = by_min_per_sample(input_imputation_df, shrinking_value=0.2)[0] fig1, fig2 = by_min_per_sample_plot( - input_imputation_df, result_df, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() @@ -205,7 +217,13 @@ def test_imputation_min_value_per_protein( result_df = by_min_per_protein(input_imputation_df, shrinking_value=1.0)[0] fig1, fig2 = by_min_per_protein_plot( - input_imputation_df, result_df, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() @@ -233,7 +251,13 @@ def test_imputation_mean_per_protein( )[0] fig1, fig2 = by_simple_imputer_plot( - input_imputation_df, result_df, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() @@ -259,7 +283,13 @@ def test_imputation_knn(show_figures, input_imputation_df, assertion_df_knn): )[0] fig1, fig2 = by_knn_plot( - input_imputation_df, result_df, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() @@ -285,7 +315,13 @@ def test_imputation_normal_distribution_sampling(show_figures, input_imputation_ )[0] fig1, fig2 = by_normal_distribution_sampling_plot( - input_imputation_df, result_df_perProtein, {}, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + result_df_perProtein, + {}, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) if show_figures: fig1.show() diff --git a/tests/protzilla/data_preprocessing/test_plots_data_preprocessing.py b/tests/protzilla/data_preprocessing/test_plots_data_preprocessing.py index 164779058..4c94b75eb 100644 --- a/tests/protzilla/data_preprocessing/test_plots_data_preprocessing.py +++ b/tests/protzilla/data_preprocessing/test_plots_data_preprocessing.py @@ -80,8 +80,20 @@ def test_create_histograms( if show_figures: fig.show() + fig = create_histograms( + dataframe_a=input_imputation_df, + dataframe_b=assertion_df_knn, + name_a="input_imputation_df", + name_b="assertion_df_knn", + heading="heading", + overlay=True, + ) + if show_figures: + fig.show() + # should throw Value Error with pytest.raises(ValueError): + # TODO: 304 create_box_plots( dataframe_a=input_imputation_df, dataframe_b=assertion_df_knn, @@ -103,7 +115,12 @@ def test_build_box_hist_plot( show_figures, input_imputation_df, assertion_df_knn, assertion_df_min_value_per_df ): fig1, fig2 = imputation._build_box_hist_plot( - input_imputation_df, assertion_df_knn, "Boxplot", "Bar chart", "Sample" + input_imputation_df, + assertion_df_knn, + "Boxplot", + "Bar chart", + "Sample", + "linear", ) fig3, fig4 = imputation._build_box_hist_plot( input_imputation_df, @@ -111,6 +128,7 @@ def test_build_box_hist_plot( "Histogram", "Pie chart", "Protein ID", + "linear", ) if show_figures: diff --git a/tests/protzilla/test_run.py b/tests/protzilla/test_run.py index 436a8ca05..b2b6a99e5 100644 --- a/tests/protzilla/test_run.py +++ b/tests/protzilla/test_run.py @@ -252,7 +252,10 @@ def test_export_plot(tests_folder_name): run.create_plot( data_preprocessing.imputation.by_min_per_sample_plot, dict( - graph_type="Boxplot", graph_type_quantities="Bar chart", group_by="Sample" + graph_type="Boxplot", + graph_type_quantities="Bar chart", + group_by="Sample", + visual_transformation="linear", ), ) assert len(run.plots) > 1 diff --git a/user_data/workflows/standard.json b/user_data/workflows/standard.json index eec360472..74a455964 100644 --- a/user_data/workflows/standard.json +++ b/user_data/workflows/standard.json @@ -54,7 +54,8 @@ "graphs": [ { "graph_type": "Boxplot", - "group_by": "Sample" + "group_by": "Sample", + "visual_transformation": "log10" }, { "graph_type_quantities": "Pie chart"