-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
298 histogram of imputed and original values #305
Changes from 16 commits
ec0fcbd
464f24d
49d86de
c0b7fe4
b91766c
ccc1401
798ff49
2e641d5
eeb4bd1
8b1e063
c4f8d2d
d7f72bd
224c226
5fc9478
d472ebd
8497496
2a212f7
3b12fb2
735e34d
c8109cf
f410094
ae09aae
5b025d2
2808a63
40c52c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,12 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import plotly.express as px | ||
import plotly.graph_objects as go | ||
from plotly.graph_objects import Figure | ||
from plotly.subplots import make_subplots | ||
|
||
from protzilla.data_preprocessing.plots_helper import generate_log_tics | ||
|
||
from ..constants.colors import ( | ||
PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE, | ||
PROTZILLA_DISCRETE_COLOR_SEQUENCE, | ||
|
@@ -116,6 +119,7 @@ def create_box_plots( | |
y_title="", | ||
x_title="", | ||
group_by: str = "None", | ||
visual_transformation="linear", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please specify the types for the parameters for consistency. |
||
) -> Figure: | ||
""" | ||
A function to create a boxplot for visualisation | ||
|
@@ -204,6 +208,8 @@ def create_box_plots( | |
"yanchor": "top", | ||
}, | ||
) | ||
if visual_transformation == "log10": | ||
fig.update_yaxes(type="log") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is there this inconsistency with "log10" and "log"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "log" is the definition used in plotly, because log10 is the only possible (intended) use in plotly. we decided to use "log10" to make explicit which base is used |
||
fig.update_yaxes(rangemode="tozero") | ||
return fig | ||
|
||
|
@@ -216,6 +222,8 @@ def create_histograms( | |
heading="", | ||
y_title="", | ||
x_title="", | ||
visual_transformation="linear", | ||
overlay=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
) -> Figure: | ||
""" | ||
A function to create a histogram for visualisation | ||
|
@@ -239,25 +247,71 @@ def create_histograms( | |
:type y_title: str | ||
:param x_title: Optional x axis title for graphs. | ||
:type x_title: str | ||
:param overlay: Specifies whether to draw one Histogram with overlay or two separate histograms | ||
:type overlay: bool | ||
:param visual_transformation: Visual transformation of the y-axis data. | ||
:type visual_transformation: str | ||
:return: returns a pie or bar chart of the data | ||
:rtype: Figure (plotly object) | ||
""" | ||
if visual_transformation not in {"linear", "log10"}: | ||
raise ValueError( | ||
f"""visual_transformation parameter must be "linear" or | ||
"log10" but is {visual_transformation}""" | ||
) | ||
|
||
intensity_name_a = dataframe_a.columns[3] | ||
intensity_name_b = dataframe_b.columns[3] | ||
fig = make_subplots(rows=1, cols=2) | ||
|
||
intensities_a = dataframe_a[intensity_name_a] | ||
intensities_b = dataframe_b[intensity_name_b] | ||
|
||
if visual_transformation == "log10": | ||
intensities_a = intensities_a.apply(np.log10) | ||
intensities_b = intensities_b.apply(np.log10) | ||
|
||
min_value = min(min(intensities_a), min(intensities_b)) | ||
max_value = max(max(intensities_a), max(intensities_b)) | ||
|
||
binsize_factor = 0.0005 if visual_transformation == "linear" else 0.02 | ||
|
||
trace0 = go.Histogram( | ||
x=dataframe_a[intensity_name_a], | ||
x=intensities_a, | ||
marker_color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[0], | ||
name=name_a, | ||
xbins=dict( | ||
start=min_value, | ||
end=max_value, | ||
size=(max_value - min_value) * binsize_factor, | ||
), | ||
) | ||
trace1 = go.Histogram( | ||
x=dataframe_b[intensity_name_b], | ||
x=intensities_b, | ||
marker_color=PROTZILLA_DISCRETE_COLOR_SEQUENCE[1], | ||
name=name_b, | ||
xbins=dict( | ||
start=min_value, | ||
end=max_value, | ||
size=(max_value - min_value) * binsize_factor, | ||
), | ||
) | ||
fig.add_trace(trace0, 1, 1) | ||
fig.add_trace(trace1, 1, 2) | ||
fig.update_layout(bargap=0.2) | ||
if not overlay: | ||
fig = make_subplots(rows=1, cols=2) | ||
fig.add_trace(trace0, 1, 1) | ||
fig.add_trace(trace1, 1, 2) | ||
if visual_transformation == "log10": | ||
fig.update_layout( | ||
xaxis=generate_log_tics(0, max_value), | ||
xaxis2=generate_log_tics(0, max_value), | ||
) | ||
else: | ||
fig = go.Figure() | ||
fig.add_trace(trace0) | ||
fig.add_trace(trace1) | ||
fig.update_layout(barmode="overlay") | ||
fig.update_traces(opacity=0.75) | ||
if visual_transformation == "log10": | ||
fig.update_layout(xaxis=generate_log_tics(0, max_value)) | ||
|
||
fig.update_layout( | ||
xaxis_title=x_title, | ||
|
@@ -276,6 +330,7 @@ def create_histograms( | |
}, | ||
) | ||
fig.update_yaxes(rangemode="tozero") | ||
|
||
return fig | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not consistent to previous format (e.g. 433-438)