Skip to content

Commit

Permalink
WIP dashboard : echarts, layout, year filter
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrotsmnrd committed Aug 21, 2024
1 parent a6bf93f commit 0e7ee58
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 80 deletions.
2 changes: 1 addition & 1 deletion web_api/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def load_data():
def dashboard_page():
template = get_template()

dashboard = MainDashboard(pn.state.cache["data"])
dashboard = MainDashboard({"RTransparent": pn.state.cache["data"]})

template.main.append(dashboard.get_dashboard)
template.sidebar.append(dashboard.get_sidebar)
Expand Down
228 changes: 149 additions & 79 deletions web_api/dashboard/main_dashboard.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import holoviews as hv
import pandas as pd
import panel as pn
import param

pn.extension()
pn.extension("echarts")


pd.options.display.max_columns = None
Expand All @@ -17,6 +16,10 @@
groups = {"year": "int"}


datasets_metrics = {
"RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"]
}

dims_aggregations = {
"is_data_pred": ["percent", "count_true", "count"],
"is_code_pred": ["percent", "count_true"],
Expand All @@ -38,14 +41,20 @@ class MainDashboard(param.Parameterized):
Main dashboard for the application.
"""

select_metrics = param.Selector(
default="RTransparent", objects=["RTransparent"], label=""
)
# High-level parameters.
dataset = param.Selector(default="", objects=[], label="Dataset")

grouping_var = param.Selector(
default="year", objects=["year", "fund_pmc_institute"], label=""
metrics = param.ListSelector(default=[], objects=[], label="Metrics")

splitting_var = param.Selector(
default="year",
objects=["year", "fund_pmc_institute"],
label="Splitting Variable",
)

# Filters
filter_pubdate = param.Range(step=1, label="Publication date")

filter_journal = param.Selector(
default="All journals (including empty)",
objects=[
Expand All @@ -57,10 +66,31 @@ class MainDashboard(param.Parameterized):
)
filter_selected_journals = param.ListSelector(default=[], objects=[], label="")

def __init__(self, raw_data, **params):
def __init__(self, datasets, **params):
super().__init__(**params)

self.raw_data = raw_data
self.datasets = datasets

# By default, take the first dataset.
# Currently, there's only RTransparent
self.param.dataset.objects = list(self.datasets.keys())
self.dataset = self.param.dataset.objects[0]

@pn.depends("dataset", watch=True)
def did_change_dataset(self):
self.metrics = datasets_metrics[self.dataset]
self.raw_data = self.datasets[self.dataset]

# Hardcoded for RTransparent for the moment, update to more generic later

self.param.filter_pubdate.bounds = (
self.raw_data.year.min(),
self.raw_data.year.max(),
)
self.param.filter_pubdate.default = (
self.raw_data.year.min(),
self.raw_data.year.max(),
)

self.param.filter_selected_journals.objects = self.raw_data.journal.unique()
# As default, takes the journals with the biggest number of occurences
Expand All @@ -76,30 +106,120 @@ def filtered_grouped_data(self):
elif self.filter_journal == "Only selected journals":
filters.append(f"journal in {self.filter_selected_journals}")

filtered_df = self.raw_data.query(*filters) if filters else self.raw_data
if self.filter_pubdate is not None:
filters.append(f"year >= {self.filter_pubdate[0]}")
filters.append(f"year <= {self.filter_pubdate[1]}")

filtered_df = (
self.raw_data.query(" and ".join(filters)) if filters else self.raw_data
)

aggretations = {}
for field, aggs in dims_aggregations.items():
for agg in aggs:
aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])

result = (
filtered_df.groupby(self.grouping_var).agg(**aggretations).reset_index()
filtered_df.groupby(self.splitting_var).agg(**aggretations).reset_index()
)

return result

@pn.depends("select_metrics", "filter_journal")
@pn.depends("dataset", "splitting_var", "filter_pubdate")
def get_echart_plot(self):
df = self.filtered_grouped_data()

xAxis = df[self.splitting_var].tolist()
series = [
{
"id": serie,
"name": serie,
"type": "line",
"data": df[serie].tolist(),
}
for serie in ["percent_is_data_pred", "percent_is_code_pred"]
]

echarts_config = {
"title": {
"text": "Percentage of Publications Following Open Science Practices Over Time",
},
"tooltip": {
"show": True,
"trigger": "axis",
# "formatter": f"""<b>{self.splitting_var}</b> : {{b0}} <br />
# {{a0}} : {{c0}} <br />
# {{a1}} : {{c1}} """,
},
"legend": {
#'data':['Sales']
"data": ["is_data_pred", "is_code_pred"],
"orient": "vertical",
"right": 10,
"top": 20,
"bottom": 20,
"show": True,
},
"xAxis": {
"data": xAxis,
"name": self.splitting_var,
"nameLocation": "center",
"nameGap": 30,
},
"yAxis": {
"name": "percent",
"nameLocation": "center",
"nameGap": 30,
},
"series": series,
}
echarts_pane = pn.pane.ECharts(
echarts_config, height=640, width=840, renderer="svg"
)
return echarts_pane

# Below are all the functions returning the different parts of the dashboard :
# Sidebar, Top Bar and the plot area (in function get_dashboard)

@pn.depends("filter_pubdate.bounds")
def get_pubdate_filter(self):
start_pubdate_input = pn.widgets.TextInput(
value=str(int(self.param.filter_pubdate.bounds[0])), width=80
)
end_pubdate_input = pn.widgets.TextInput(
value=str(int(self.param.filter_pubdate.bounds[1])), width=80
)

pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate)

def update_pubdate_text_inputs(event):
start_pubdate_input.value = str(pubdate_slider.value[0])
end_pubdate_input.value = str(pubdate_slider.value[1])

pubdate_slider.param.watch(update_pubdate_text_inputs, "value")

def update_pubdate_slider(event):
pubdate_slider.value = (
int(start_pubdate_input.value or self.param.filter_pubdate.bounds[0]),
int(end_pubdate_input.value or self.param.filter_pubdate.bounds[1]),
)

start_pubdate_input.param.watch(update_pubdate_slider, "value")
end_pubdate_input.param.watch(update_pubdate_slider, "value")

return pn.Column(pn.Row(start_pubdate_input, end_pubdate_input), pubdate_slider)

@pn.depends("dataset", "filter_journal")
def get_sidebar(self):
items = [
pn.pane.Markdown("## Filters"),
pn.pane.Markdown("### Metrics extraction tool"),
pn.widgets.Select.from_param(self.param.select_metrics),
pn.pane.Markdown("### Applied Filters"),
pn.pane.Markdown("(todo)"),
pn.layout.Divider(),
pn.pane.Markdown("### Grouping"),
pn.widgets.Select.from_param(self.param.grouping_var),
pn.pane.Markdown("### Publication Details"),
# pn.pane.Markdown("#### Publication Date"),
self.get_pubdate_filter,
pn.layout.Divider(),
pn.pane.Markdown("### Filters"),
pn.widgets.Select.from_param(self.param.filter_journal),
]

Expand All @@ -114,74 +234,24 @@ def get_sidebar(self):

return sidebar

@pn.depends("dataset")
def get_top_bar(self):
return pn.Row(
pn.widgets.Select.from_param(self.param.dataset),
pn.widgets.CheckBoxGroup.from_param(self.param.metrics),
pn.widgets.Select.from_param(self.param.splitting_var),
)

@pn.depends(
"select_metrics", "filter_journal", "filter_selected_journals", "grouping_var"
"dataset", "filter_journal", "filter_selected_journals", "splitting_var"
)
def get_dashboard(self):
df = self.filtered_grouped_data()

# Create charts
fig_data_curve = hv.Curve(
df,
kdims=[self.grouping_var],
vdims=[
"percent_is_data_pred",
],
).opts(color="red")

fig_code_curve = hv.Curve(
df,
kdims=[self.grouping_var],
vdims=[
"percent_is_code_pred",
],
).opts(color="lightblue")

fig_data_points = hv.Points(
df,
kdims=[self.grouping_var, "percent_is_data_pred"],
).opts(
tools=["hover"],
color="red",
size=5,
hover_tooltips=[
(self.grouping_var, f"@{self.grouping_var}"),
("% is_data_prep", "@percent_is_data_pred"),
("Total is_data_prep", "@count_true_is_data_pred"),
("nbr_publications", "@count_is_data_pred"),
],
)

fig_code_points = hv.Points(
df,
kdims=[self.grouping_var, "percent_is_code_pred"],
).opts(
tools=["hover"],
color="lightblue",
size=5,
hover_tooltips=[
(self.grouping_var, f"@{self.grouping_var}"),
("% is_code_prep", "@percent_is_code_pred"),
("Total is_code_prep", "@count_true_is_code_pred"),
("nbr_publications", "@count_is_data_pred"),
],
)

plot = (
fig_code_curve * fig_data_curve * fig_data_points * fig_code_points
).opts(
title="",
xlabel=self.grouping_var,
ylabel="Percentage",
width=800,
height=400,
legend_position="top_left",
)

# Layout the dashboard
dashboard = pn.Column(
"# Data and code transparency",
pn.Column(plot, sizing_mode="stretch_width"),
pn.Column(
self.get_top_bar, self.get_echart_plot, sizing_mode="stretch_width"
),
)

return dashboard

0 comments on commit 0e7ee58

Please sign in to comment.