From 0e7ee58cb6bf407eedf0e0d08842d6a762149f86 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Simonard Date: Wed, 21 Aug 2024 09:31:58 +0200 Subject: [PATCH] WIP dashboard : echarts, layout, year filter --- web_api/dashboard/app.py | 2 +- web_api/dashboard/main_dashboard.py | 228 ++++++++++++++++++---------- 2 files changed, 150 insertions(+), 80 deletions(-) diff --git a/web_api/dashboard/app.py b/web_api/dashboard/app.py index 1e2e7354..db29529f 100644 --- a/web_api/dashboard/app.py +++ b/web_api/dashboard/app.py @@ -72,7 +72,7 @@ def load_data(): def dashboard_page(): template = get_template() - dashboard = MainDashboard(pn.state.cache["data"]) + dashboard = MainDashboard({"RTransparent": pn.state.cache["data"]}) template.main.append(dashboard.get_dashboard) template.sidebar.append(dashboard.get_sidebar) diff --git a/web_api/dashboard/main_dashboard.py b/web_api/dashboard/main_dashboard.py index a5044088..a2f41ee8 100644 --- a/web_api/dashboard/main_dashboard.py +++ b/web_api/dashboard/main_dashboard.py @@ -1,9 +1,8 @@ -import holoviews as hv import pandas as pd import panel as pn import param -pn.extension() +pn.extension("echarts") pd.options.display.max_columns = None @@ -17,6 +16,10 @@ groups = {"year": "int"} +datasets_metrics = { + "RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"] +} + dims_aggregations = { "is_data_pred": ["percent", "count_true", "count"], "is_code_pred": ["percent", "count_true"], @@ -38,14 +41,20 @@ class MainDashboard(param.Parameterized): Main dashboard for the application. """ - select_metrics = param.Selector( - default="RTransparent", objects=["RTransparent"], label="" - ) + # High-level parameters. + dataset = param.Selector(default="", objects=[], label="Dataset") - grouping_var = param.Selector( - default="year", objects=["year", "fund_pmc_institute"], label="" + metrics = param.ListSelector(default=[], objects=[], label="Metrics") + + splitting_var = param.Selector( + default="year", + objects=["year", "fund_pmc_institute"], + label="Splitting Variable", ) + # Filters + filter_pubdate = param.Range(step=1, label="Publication date") + filter_journal = param.Selector( default="All journals (including empty)", objects=[ @@ -57,10 +66,31 @@ class MainDashboard(param.Parameterized): ) filter_selected_journals = param.ListSelector(default=[], objects=[], label="") - def __init__(self, raw_data, **params): + def __init__(self, datasets, **params): super().__init__(**params) - self.raw_data = raw_data + self.datasets = datasets + + # By default, take the first dataset. + # Currently, there's only RTransparent + self.param.dataset.objects = list(self.datasets.keys()) + self.dataset = self.param.dataset.objects[0] + + @pn.depends("dataset", watch=True) + def did_change_dataset(self): + self.metrics = datasets_metrics[self.dataset] + self.raw_data = self.datasets[self.dataset] + + # Hardcoded for RTransparent for the moment, update to more generic later + + self.param.filter_pubdate.bounds = ( + self.raw_data.year.min(), + self.raw_data.year.max(), + ) + self.param.filter_pubdate.default = ( + self.raw_data.year.min(), + self.raw_data.year.max(), + ) self.param.filter_selected_journals.objects = self.raw_data.journal.unique() # As default, takes the journals with the biggest number of occurences @@ -76,7 +106,13 @@ def filtered_grouped_data(self): elif self.filter_journal == "Only selected journals": filters.append(f"journal in {self.filter_selected_journals}") - filtered_df = self.raw_data.query(*filters) if filters else self.raw_data + if self.filter_pubdate is not None: + filters.append(f"year >= {self.filter_pubdate[0]}") + filters.append(f"year <= {self.filter_pubdate[1]}") + + filtered_df = ( + self.raw_data.query(" and ".join(filters)) if filters else self.raw_data + ) aggretations = {} for field, aggs in dims_aggregations.items(): @@ -84,22 +120,106 @@ def filtered_grouped_data(self): aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg]) result = ( - filtered_df.groupby(self.grouping_var).agg(**aggretations).reset_index() + filtered_df.groupby(self.splitting_var).agg(**aggretations).reset_index() ) return result - @pn.depends("select_metrics", "filter_journal") + @pn.depends("dataset", "splitting_var", "filter_pubdate") + def get_echart_plot(self): + df = self.filtered_grouped_data() + + xAxis = df[self.splitting_var].tolist() + series = [ + { + "id": serie, + "name": serie, + "type": "line", + "data": df[serie].tolist(), + } + for serie in ["percent_is_data_pred", "percent_is_code_pred"] + ] + + echarts_config = { + "title": { + "text": "Percentage of Publications Following Open Science Practices Over Time", + }, + "tooltip": { + "show": True, + "trigger": "axis", + # "formatter": f"""{self.splitting_var} : {{b0}}
+ # {{a0}} : {{c0}}
+ # {{a1}} : {{c1}} """, + }, + "legend": { + #'data':['Sales'] + "data": ["is_data_pred", "is_code_pred"], + "orient": "vertical", + "right": 10, + "top": 20, + "bottom": 20, + "show": True, + }, + "xAxis": { + "data": xAxis, + "name": self.splitting_var, + "nameLocation": "center", + "nameGap": 30, + }, + "yAxis": { + "name": "percent", + "nameLocation": "center", + "nameGap": 30, + }, + "series": series, + } + echarts_pane = pn.pane.ECharts( + echarts_config, height=640, width=840, renderer="svg" + ) + return echarts_pane + + # Below are all the functions returning the different parts of the dashboard : + # Sidebar, Top Bar and the plot area (in function get_dashboard) + + @pn.depends("filter_pubdate.bounds") + def get_pubdate_filter(self): + start_pubdate_input = pn.widgets.TextInput( + value=str(int(self.param.filter_pubdate.bounds[0])), width=80 + ) + end_pubdate_input = pn.widgets.TextInput( + value=str(int(self.param.filter_pubdate.bounds[1])), width=80 + ) + + pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate) + + def update_pubdate_text_inputs(event): + start_pubdate_input.value = str(pubdate_slider.value[0]) + end_pubdate_input.value = str(pubdate_slider.value[1]) + + pubdate_slider.param.watch(update_pubdate_text_inputs, "value") + + def update_pubdate_slider(event): + pubdate_slider.value = ( + int(start_pubdate_input.value or self.param.filter_pubdate.bounds[0]), + int(end_pubdate_input.value or self.param.filter_pubdate.bounds[1]), + ) + + start_pubdate_input.param.watch(update_pubdate_slider, "value") + end_pubdate_input.param.watch(update_pubdate_slider, "value") + + return pn.Column(pn.Row(start_pubdate_input, end_pubdate_input), pubdate_slider) + + @pn.depends("dataset", "filter_journal") def get_sidebar(self): items = [ pn.pane.Markdown("## Filters"), - pn.pane.Markdown("### Metrics extraction tool"), - pn.widgets.Select.from_param(self.param.select_metrics), + pn.pane.Markdown("### Applied Filters"), + pn.pane.Markdown("(todo)"), pn.layout.Divider(), - pn.pane.Markdown("### Grouping"), - pn.widgets.Select.from_param(self.param.grouping_var), + pn.pane.Markdown("### Publication Details"), + # pn.pane.Markdown("#### Publication Date"), + self.get_pubdate_filter, pn.layout.Divider(), - pn.pane.Markdown("### Filters"), pn.widgets.Select.from_param(self.param.filter_journal), ] @@ -114,74 +234,24 @@ def get_sidebar(self): return sidebar + @pn.depends("dataset") + def get_top_bar(self): + return pn.Row( + pn.widgets.Select.from_param(self.param.dataset), + pn.widgets.CheckBoxGroup.from_param(self.param.metrics), + pn.widgets.Select.from_param(self.param.splitting_var), + ) + @pn.depends( - "select_metrics", "filter_journal", "filter_selected_journals", "grouping_var" + "dataset", "filter_journal", "filter_selected_journals", "splitting_var" ) def get_dashboard(self): - df = self.filtered_grouped_data() - - # Create charts - fig_data_curve = hv.Curve( - df, - kdims=[self.grouping_var], - vdims=[ - "percent_is_data_pred", - ], - ).opts(color="red") - - fig_code_curve = hv.Curve( - df, - kdims=[self.grouping_var], - vdims=[ - "percent_is_code_pred", - ], - ).opts(color="lightblue") - - fig_data_points = hv.Points( - df, - kdims=[self.grouping_var, "percent_is_data_pred"], - ).opts( - tools=["hover"], - color="red", - size=5, - hover_tooltips=[ - (self.grouping_var, f"@{self.grouping_var}"), - ("% is_data_prep", "@percent_is_data_pred"), - ("Total is_data_prep", "@count_true_is_data_pred"), - ("nbr_publications", "@count_is_data_pred"), - ], - ) - - fig_code_points = hv.Points( - df, - kdims=[self.grouping_var, "percent_is_code_pred"], - ).opts( - tools=["hover"], - color="lightblue", - size=5, - hover_tooltips=[ - (self.grouping_var, f"@{self.grouping_var}"), - ("% is_code_prep", "@percent_is_code_pred"), - ("Total is_code_prep", "@count_true_is_code_pred"), - ("nbr_publications", "@count_is_data_pred"), - ], - ) - - plot = ( - fig_code_curve * fig_data_curve * fig_data_points * fig_code_points - ).opts( - title="", - xlabel=self.grouping_var, - ylabel="Percentage", - width=800, - height=400, - legend_position="top_left", - ) - # Layout the dashboard dashboard = pn.Column( "# Data and code transparency", - pn.Column(plot, sizing_mode="stretch_width"), + pn.Column( + self.get_top_bar, self.get_echart_plot, sizing_mode="stretch_width" + ), ) return dashboard