WIP dashboard : echarts, layout, year filter

nimh-dsst · Aug 21, 2024 · 0e7ee58 · 0e7ee58
1 parent a6bf93f
commit 0e7ee58
Show file tree

Hide file tree

Showing 2 changed files with 150 additions and 80 deletions.
diff --git a/web_api/dashboard/app.py b/web_api/dashboard/app.py
@@ -72,7 +72,7 @@ def load_data():
 def dashboard_page():
     template = get_template()
 
-    dashboard = MainDashboard(pn.state.cache["data"])
+    dashboard = MainDashboard({"RTransparent": pn.state.cache["data"]})
 
     template.main.append(dashboard.get_dashboard)
     template.sidebar.append(dashboard.get_sidebar)

diff --git a/web_api/dashboard/main_dashboard.py b/web_api/dashboard/main_dashboard.py
@@ -1,9 +1,8 @@
-import holoviews as hv
 import pandas as pd
 import panel as pn
 import param
 
-pn.extension()
+pn.extension("echarts")
 
 
 pd.options.display.max_columns = None
@@ -17,6 +16,10 @@
 groups = {"year": "int"}
 
 
+datasets_metrics = {
+    "RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"]
+}
+
 dims_aggregations = {
     "is_data_pred": ["percent", "count_true", "count"],
     "is_code_pred": ["percent", "count_true"],
@@ -38,14 +41,20 @@ class MainDashboard(param.Parameterized):
     Main dashboard for the application.
     """
 
-    select_metrics = param.Selector(
-        default="RTransparent", objects=["RTransparent"], label=""
-    )
+    # High-level parameters.
+    dataset = param.Selector(default="", objects=[], label="Dataset")
 
-    grouping_var = param.Selector(
-        default="year", objects=["year", "fund_pmc_institute"], label=""
+    metrics = param.ListSelector(default=[], objects=[], label="Metrics")
+
+    splitting_var = param.Selector(
+        default="year",
+        objects=["year", "fund_pmc_institute"],
+        label="Splitting Variable",
     )
 
+    # Filters
+    filter_pubdate = param.Range(step=1, label="Publication date")
+
     filter_journal = param.Selector(
         default="All journals (including empty)",
         objects=[
@@ -57,10 +66,31 @@ class MainDashboard(param.Parameterized):
     )
     filter_selected_journals = param.ListSelector(default=[], objects=[], label="")
 
-    def __init__(self, raw_data, **params):
+    def __init__(self, datasets, **params):
         super().__init__(**params)
 
-        self.raw_data = raw_data
+        self.datasets = datasets
+
+        # By default, take the first dataset.
+        # Currently, there's only RTransparent
+        self.param.dataset.objects = list(self.datasets.keys())
+        self.dataset = self.param.dataset.objects[0]
+
+    @pn.depends("dataset", watch=True)
+    def did_change_dataset(self):
+        self.metrics = datasets_metrics[self.dataset]
+        self.raw_data = self.datasets[self.dataset]
+
+        # Hardcoded for RTransparent for the moment, update to more generic later
+
+        self.param.filter_pubdate.bounds = (
+            self.raw_data.year.min(),
+            self.raw_data.year.max(),
+        )
+        self.param.filter_pubdate.default = (
+            self.raw_data.year.min(),
+            self.raw_data.year.max(),
+        )
 
         self.param.filter_selected_journals.objects = self.raw_data.journal.unique()
         # As default, takes the journals with the biggest number of occurences
@@ -76,30 +106,120 @@ def filtered_grouped_data(self):
         elif self.filter_journal == "Only selected journals":
             filters.append(f"journal in {self.filter_selected_journals}")
 
-        filtered_df = self.raw_data.query(*filters) if filters else self.raw_data
+        if self.filter_pubdate is not None:
+            filters.append(f"year >= {self.filter_pubdate[0]}")
+            filters.append(f"year <= {self.filter_pubdate[1]}")
+
+        filtered_df = (
+            self.raw_data.query(" and ".join(filters)) if filters else self.raw_data
+        )
 
         aggretations = {}
         for field, aggs in dims_aggregations.items():
             for agg in aggs:
                 aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])
 
         result = (
-            filtered_df.groupby(self.grouping_var).agg(**aggretations).reset_index()
+            filtered_df.groupby(self.splitting_var).agg(**aggretations).reset_index()
         )
 
         return result
 
-    @pn.depends("select_metrics", "filter_journal")
+    @pn.depends("dataset", "splitting_var", "filter_pubdate")
+    def get_echart_plot(self):
+        df = self.filtered_grouped_data()
+
+        xAxis = df[self.splitting_var].tolist()
+        series = [
+            {
+                "id": serie,
+                "name": serie,
+                "type": "line",
+                "data": df[serie].tolist(),
+            }
+            for serie in ["percent_is_data_pred", "percent_is_code_pred"]
+        ]
+
+        echarts_config = {
+            "title": {
+                "text": "Percentage of Publications Following Open Science Practices Over Time",
+            },
+            "tooltip": {
+                "show": True,
+                "trigger": "axis",
+                # "formatter": f"""<b>{self.splitting_var}</b> : {{b0}} <br />
+                #                 {{a0}} : {{c0}} <br />
+                #                 {{a1}} : {{c1}} """,
+            },
+            "legend": {
+                #'data':['Sales']
+                "data": ["is_data_pred", "is_code_pred"],
+                "orient": "vertical",
+                "right": 10,
+                "top": 20,
+                "bottom": 20,
+                "show": True,
+            },
+            "xAxis": {
+                "data": xAxis,
+                "name": self.splitting_var,
+                "nameLocation": "center",
+                "nameGap": 30,
+            },
+            "yAxis": {
+                "name": "percent",
+                "nameLocation": "center",
+                "nameGap": 30,
+            },
+            "series": series,
+        }
+        echarts_pane = pn.pane.ECharts(
+            echarts_config, height=640, width=840, renderer="svg"
+        )
+        return echarts_pane
+
+    # Below are all the functions returning the different parts of the dashboard :
+    # Sidebar, Top Bar and the plot area (in function get_dashboard)
+
+    @pn.depends("filter_pubdate.bounds")
+    def get_pubdate_filter(self):
+        start_pubdate_input = pn.widgets.TextInput(
+            value=str(int(self.param.filter_pubdate.bounds[0])), width=80
+        )
+        end_pubdate_input = pn.widgets.TextInput(
+            value=str(int(self.param.filter_pubdate.bounds[1])), width=80
+        )
+
+        pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate)
+
+        def update_pubdate_text_inputs(event):
+            start_pubdate_input.value = str(pubdate_slider.value[0])
+            end_pubdate_input.value = str(pubdate_slider.value[1])
+
+        pubdate_slider.param.watch(update_pubdate_text_inputs, "value")
+
+        def update_pubdate_slider(event):
+            pubdate_slider.value = (
+                int(start_pubdate_input.value or self.param.filter_pubdate.bounds[0]),
+                int(end_pubdate_input.value or self.param.filter_pubdate.bounds[1]),
+            )
+
+        start_pubdate_input.param.watch(update_pubdate_slider, "value")
+        end_pubdate_input.param.watch(update_pubdate_slider, "value")
+
+        return pn.Column(pn.Row(start_pubdate_input, end_pubdate_input), pubdate_slider)
+
+    @pn.depends("dataset", "filter_journal")
     def get_sidebar(self):
         items = [
             pn.pane.Markdown("## Filters"),
-            pn.pane.Markdown("### Metrics extraction tool"),
-            pn.widgets.Select.from_param(self.param.select_metrics),
+            pn.pane.Markdown("### Applied Filters"),
+            pn.pane.Markdown("(todo)"),
             pn.layout.Divider(),
-            pn.pane.Markdown("### Grouping"),
-            pn.widgets.Select.from_param(self.param.grouping_var),
+            pn.pane.Markdown("### Publication Details"),
+            # pn.pane.Markdown("#### Publication Date"),
+            self.get_pubdate_filter,
             pn.layout.Divider(),
-            pn.pane.Markdown("### Filters"),
             pn.widgets.Select.from_param(self.param.filter_journal),
         ]
 
@@ -114,74 +234,24 @@ def get_sidebar(self):
 
         return sidebar
 
+    @pn.depends("dataset")
+    def get_top_bar(self):
+        return pn.Row(
+            pn.widgets.Select.from_param(self.param.dataset),
+            pn.widgets.CheckBoxGroup.from_param(self.param.metrics),
+            pn.widgets.Select.from_param(self.param.splitting_var),
+        )
+
     @pn.depends(
-        "select_metrics", "filter_journal", "filter_selected_journals", "grouping_var"
+        "dataset", "filter_journal", "filter_selected_journals", "splitting_var"
     )
     def get_dashboard(self):
-        df = self.filtered_grouped_data()
-
-        # Create charts
-        fig_data_curve = hv.Curve(
-            df,
-            kdims=[self.grouping_var],
-            vdims=[
-                "percent_is_data_pred",
-            ],
-        ).opts(color="red")
-
-        fig_code_curve = hv.Curve(
-            df,
-            kdims=[self.grouping_var],
-            vdims=[
-                "percent_is_code_pred",
-            ],
-        ).opts(color="lightblue")
-
-        fig_data_points = hv.Points(
-            df,
-            kdims=[self.grouping_var, "percent_is_data_pred"],
-        ).opts(
-            tools=["hover"],
-            color="red",
-            size=5,
-            hover_tooltips=[
-                (self.grouping_var, f"@{self.grouping_var}"),
-                ("% is_data_prep", "@percent_is_data_pred"),
-                ("Total is_data_prep", "@count_true_is_data_pred"),
-                ("nbr_publications", "@count_is_data_pred"),
-            ],
-        )
-
-        fig_code_points = hv.Points(
-            df,
-            kdims=[self.grouping_var, "percent_is_code_pred"],
-        ).opts(
-            tools=["hover"],
-            color="lightblue",
-            size=5,
-            hover_tooltips=[
-                (self.grouping_var, f"@{self.grouping_var}"),
-                ("% is_code_prep", "@percent_is_code_pred"),
-                ("Total is_code_prep", "@count_true_is_code_pred"),
-                ("nbr_publications", "@count_is_data_pred"),
-            ],
-        )
-
-        plot = (
-            fig_code_curve * fig_data_curve * fig_data_points * fig_code_points
-        ).opts(
-            title="",
-            xlabel=self.grouping_var,
-            ylabel="Percentage",
-            width=800,
-            height=400,
-            legend_position="top_left",
-        )
-
         # Layout the dashboard
         dashboard = pn.Column(
             "# Data and code transparency",
-            pn.Column(plot, sizing_mode="stretch_width"),
+            pn.Column(
+                self.get_top_bar, self.get_echart_plot, sizing_mode="stretch_width"
+            ),
         )
 
         return dashboard