diff --git a/web_api/dashboard/app.py b/web_api/dashboard/app.py
index 1e2e7354..db29529f 100644
--- a/web_api/dashboard/app.py
+++ b/web_api/dashboard/app.py
@@ -72,7 +72,7 @@ def load_data():
def dashboard_page():
template = get_template()
- dashboard = MainDashboard(pn.state.cache["data"])
+ dashboard = MainDashboard({"RTransparent": pn.state.cache["data"]})
template.main.append(dashboard.get_dashboard)
template.sidebar.append(dashboard.get_sidebar)
diff --git a/web_api/dashboard/main_dashboard.py b/web_api/dashboard/main_dashboard.py
index a5044088..a2f41ee8 100644
--- a/web_api/dashboard/main_dashboard.py
+++ b/web_api/dashboard/main_dashboard.py
@@ -1,9 +1,8 @@
-import holoviews as hv
import pandas as pd
import panel as pn
import param
-pn.extension()
+pn.extension("echarts")
pd.options.display.max_columns = None
@@ -17,6 +16,10 @@
groups = {"year": "int"}
+datasets_metrics = {
+ "RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"]
+}
+
dims_aggregations = {
"is_data_pred": ["percent", "count_true", "count"],
"is_code_pred": ["percent", "count_true"],
@@ -38,14 +41,20 @@ class MainDashboard(param.Parameterized):
Main dashboard for the application.
"""
- select_metrics = param.Selector(
- default="RTransparent", objects=["RTransparent"], label=""
- )
+ # High-level parameters.
+ dataset = param.Selector(default="", objects=[], label="Dataset")
- grouping_var = param.Selector(
- default="year", objects=["year", "fund_pmc_institute"], label=""
+ metrics = param.ListSelector(default=[], objects=[], label="Metrics")
+
+ splitting_var = param.Selector(
+ default="year",
+ objects=["year", "fund_pmc_institute"],
+ label="Splitting Variable",
)
+ # Filters
+ filter_pubdate = param.Range(step=1, label="Publication date")
+
filter_journal = param.Selector(
default="All journals (including empty)",
objects=[
@@ -57,10 +66,31 @@ class MainDashboard(param.Parameterized):
)
filter_selected_journals = param.ListSelector(default=[], objects=[], label="")
- def __init__(self, raw_data, **params):
+ def __init__(self, datasets, **params):
super().__init__(**params)
- self.raw_data = raw_data
+ self.datasets = datasets
+
+ # By default, take the first dataset.
+ # Currently, there's only RTransparent
+ self.param.dataset.objects = list(self.datasets.keys())
+ self.dataset = self.param.dataset.objects[0]
+
+ @pn.depends("dataset", watch=True)
+ def did_change_dataset(self):
+ self.metrics = datasets_metrics[self.dataset]
+ self.raw_data = self.datasets[self.dataset]
+
+ # Hardcoded for RTransparent for the moment, update to more generic later
+
+ self.param.filter_pubdate.bounds = (
+ self.raw_data.year.min(),
+ self.raw_data.year.max(),
+ )
+ self.param.filter_pubdate.default = (
+ self.raw_data.year.min(),
+ self.raw_data.year.max(),
+ )
self.param.filter_selected_journals.objects = self.raw_data.journal.unique()
# As default, takes the journals with the biggest number of occurences
@@ -76,7 +106,13 @@ def filtered_grouped_data(self):
elif self.filter_journal == "Only selected journals":
filters.append(f"journal in {self.filter_selected_journals}")
- filtered_df = self.raw_data.query(*filters) if filters else self.raw_data
+ if self.filter_pubdate is not None:
+ filters.append(f"year >= {self.filter_pubdate[0]}")
+ filters.append(f"year <= {self.filter_pubdate[1]}")
+
+ filtered_df = (
+ self.raw_data.query(" and ".join(filters)) if filters else self.raw_data
+ )
aggretations = {}
for field, aggs in dims_aggregations.items():
@@ -84,22 +120,106 @@ def filtered_grouped_data(self):
aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])
result = (
- filtered_df.groupby(self.grouping_var).agg(**aggretations).reset_index()
+ filtered_df.groupby(self.splitting_var).agg(**aggretations).reset_index()
)
return result
- @pn.depends("select_metrics", "filter_journal")
+ @pn.depends("dataset", "splitting_var", "filter_pubdate")
+ def get_echart_plot(self):
+ df = self.filtered_grouped_data()
+
+ xAxis = df[self.splitting_var].tolist()
+ series = [
+ {
+ "id": serie,
+ "name": serie,
+ "type": "line",
+ "data": df[serie].tolist(),
+ }
+ for serie in ["percent_is_data_pred", "percent_is_code_pred"]
+ ]
+
+ echarts_config = {
+ "title": {
+ "text": "Percentage of Publications Following Open Science Practices Over Time",
+ },
+ "tooltip": {
+ "show": True,
+ "trigger": "axis",
+ # "formatter": f"""{self.splitting_var} : {{b0}}
+ # {{a0}} : {{c0}}
+ # {{a1}} : {{c1}} """,
+ },
+ "legend": {
+ #'data':['Sales']
+ "data": ["is_data_pred", "is_code_pred"],
+ "orient": "vertical",
+ "right": 10,
+ "top": 20,
+ "bottom": 20,
+ "show": True,
+ },
+ "xAxis": {
+ "data": xAxis,
+ "name": self.splitting_var,
+ "nameLocation": "center",
+ "nameGap": 30,
+ },
+ "yAxis": {
+ "name": "percent",
+ "nameLocation": "center",
+ "nameGap": 30,
+ },
+ "series": series,
+ }
+ echarts_pane = pn.pane.ECharts(
+ echarts_config, height=640, width=840, renderer="svg"
+ )
+ return echarts_pane
+
+ # Below are all the functions returning the different parts of the dashboard :
+ # Sidebar, Top Bar and the plot area (in function get_dashboard)
+
+ @pn.depends("filter_pubdate.bounds")
+ def get_pubdate_filter(self):
+ start_pubdate_input = pn.widgets.TextInput(
+ value=str(int(self.param.filter_pubdate.bounds[0])), width=80
+ )
+ end_pubdate_input = pn.widgets.TextInput(
+ value=str(int(self.param.filter_pubdate.bounds[1])), width=80
+ )
+
+ pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate)
+
+ def update_pubdate_text_inputs(event):
+ start_pubdate_input.value = str(pubdate_slider.value[0])
+ end_pubdate_input.value = str(pubdate_slider.value[1])
+
+ pubdate_slider.param.watch(update_pubdate_text_inputs, "value")
+
+ def update_pubdate_slider(event):
+ pubdate_slider.value = (
+ int(start_pubdate_input.value or self.param.filter_pubdate.bounds[0]),
+ int(end_pubdate_input.value or self.param.filter_pubdate.bounds[1]),
+ )
+
+ start_pubdate_input.param.watch(update_pubdate_slider, "value")
+ end_pubdate_input.param.watch(update_pubdate_slider, "value")
+
+ return pn.Column(pn.Row(start_pubdate_input, end_pubdate_input), pubdate_slider)
+
+ @pn.depends("dataset", "filter_journal")
def get_sidebar(self):
items = [
pn.pane.Markdown("## Filters"),
- pn.pane.Markdown("### Metrics extraction tool"),
- pn.widgets.Select.from_param(self.param.select_metrics),
+ pn.pane.Markdown("### Applied Filters"),
+ pn.pane.Markdown("(todo)"),
pn.layout.Divider(),
- pn.pane.Markdown("### Grouping"),
- pn.widgets.Select.from_param(self.param.grouping_var),
+ pn.pane.Markdown("### Publication Details"),
+ # pn.pane.Markdown("#### Publication Date"),
+ self.get_pubdate_filter,
pn.layout.Divider(),
- pn.pane.Markdown("### Filters"),
pn.widgets.Select.from_param(self.param.filter_journal),
]
@@ -114,74 +234,24 @@ def get_sidebar(self):
return sidebar
+ @pn.depends("dataset")
+ def get_top_bar(self):
+ return pn.Row(
+ pn.widgets.Select.from_param(self.param.dataset),
+ pn.widgets.CheckBoxGroup.from_param(self.param.metrics),
+ pn.widgets.Select.from_param(self.param.splitting_var),
+ )
+
@pn.depends(
- "select_metrics", "filter_journal", "filter_selected_journals", "grouping_var"
+ "dataset", "filter_journal", "filter_selected_journals", "splitting_var"
)
def get_dashboard(self):
- df = self.filtered_grouped_data()
-
- # Create charts
- fig_data_curve = hv.Curve(
- df,
- kdims=[self.grouping_var],
- vdims=[
- "percent_is_data_pred",
- ],
- ).opts(color="red")
-
- fig_code_curve = hv.Curve(
- df,
- kdims=[self.grouping_var],
- vdims=[
- "percent_is_code_pred",
- ],
- ).opts(color="lightblue")
-
- fig_data_points = hv.Points(
- df,
- kdims=[self.grouping_var, "percent_is_data_pred"],
- ).opts(
- tools=["hover"],
- color="red",
- size=5,
- hover_tooltips=[
- (self.grouping_var, f"@{self.grouping_var}"),
- ("% is_data_prep", "@percent_is_data_pred"),
- ("Total is_data_prep", "@count_true_is_data_pred"),
- ("nbr_publications", "@count_is_data_pred"),
- ],
- )
-
- fig_code_points = hv.Points(
- df,
- kdims=[self.grouping_var, "percent_is_code_pred"],
- ).opts(
- tools=["hover"],
- color="lightblue",
- size=5,
- hover_tooltips=[
- (self.grouping_var, f"@{self.grouping_var}"),
- ("% is_code_prep", "@percent_is_code_pred"),
- ("Total is_code_prep", "@count_true_is_code_pred"),
- ("nbr_publications", "@count_is_data_pred"),
- ],
- )
-
- plot = (
- fig_code_curve * fig_data_curve * fig_data_points * fig_code_points
- ).opts(
- title="",
- xlabel=self.grouping_var,
- ylabel="Percentage",
- width=800,
- height=400,
- legend_position="top_left",
- )
-
# Layout the dashboard
dashboard = pn.Column(
"# Data and code transparency",
- pn.Column(plot, sizing_mode="stretch_width"),
+ pn.Column(
+ self.get_top_bar, self.get_echart_plot, sizing_mode="stretch_width"
+ ),
)
return dashboard