From fc9510d07cc654c467f0a73b0dbf091da8017119 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Simonard Date: Thu, 22 Aug 2024 08:53:40 +0200 Subject: [PATCH 1/2] WIP dashboard : year shortcuts, widgets, comments --- web/dashboard/main_dashboard.py | 149 +++++++++++++++++++++++++------- 1 file changed, 119 insertions(+), 30 deletions(-) diff --git a/web/dashboard/main_dashboard.py b/web/dashboard/main_dashboard.py index a2f41ee8..97acc8d3 100644 --- a/web/dashboard/main_dashboard.py +++ b/web/dashboard/main_dashboard.py @@ -1,3 +1,5 @@ +from datetime import datetime + import pandas as pd import panel as pn import param @@ -15,18 +17,28 @@ groups = {"year": "int"} - -datasets_metrics = { +extraction_tools_metrics = { "RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"] } dims_aggregations = { - "is_data_pred": ["percent", "count_true", "count"], + "is_data_pred": ["percent", "count_true"], "is_code_pred": ["percent", "count_true"], "score": ["mean"], "eigenfactor_score": ["mean"], } +metrics_titles = { + "percent_is_data_pred": "Data Sharing (%)", + "percent_is_code_pred": "Code Sharing (%)", + "count_true_is_data_pred": "Data Sharing", + "count_true_is_code_pred": "Code Sharing", + "mean_score": "Mean Score", + "mean_eigenfactor_score": "Mean Eigenfactor Score", +} + +metrics_by_title = {v: k for k, v in metrics_titles.items()} + aggregation_formulas = { "percent": lambda x: x.mean() * 100, @@ -42,9 +54,9 @@ class MainDashboard(param.Parameterized): """ # High-level parameters. - dataset = param.Selector(default="", objects=[], label="Dataset") + extraction_tool = param.Selector(default="", objects=[], label="Extraction tool") - metrics = param.ListSelector(default=[], objects=[], label="Metrics") + metrics = param.Selector(default=[], objects=[], label="Metrics") splitting_var = param.Selector( default="year", @@ -53,7 +65,9 @@ class MainDashboard(param.Parameterized): ) # Filters - filter_pubdate = param.Range(step=1, label="Publication date") + filter_pubdate = param.Range( # (2000, 2024), bounds=(2000, 2024), + step=1, label="Publication date" + ) filter_journal = param.Selector( default="All journals (including empty)", @@ -73,24 +87,45 @@ def __init__(self, datasets, **params): # By default, take the first dataset. # Currently, there's only RTransparent - self.param.dataset.objects = list(self.datasets.keys()) - self.dataset = self.param.dataset.objects[0] + self.param.extraction_tool.objects = list(self.datasets.keys()) + self.extraction_tool = self.param.extraction_tool.objects[0] + + @pn.depends("extraction_tool", watch=True) + def did_change_extraction_tool(self): + print("DID_CHANGE_EXTRACTION_TOOL") + + new_extraction_tools_metrics_metrics = extraction_tools_metrics[ + self.extraction_tool + ] + + new_metrics = [] + for m in new_extraction_tools_metrics_metrics: + for agg in dims_aggregations[m]: + new_metrics.append(metrics_titles[f"{agg}_{m}"]) + + self.param.metrics.objects = new_metrics + self.metrics = self.param.metrics.objects[0] + + # self.param.metrics.objects = extraction_tools_metrics[self.extraction_tool] + # self.metrics = self.param.metrics.objects[0] - @pn.depends("dataset", watch=True) - def did_change_dataset(self): - self.metrics = datasets_metrics[self.dataset] - self.raw_data = self.datasets[self.dataset] + self.raw_data = self.datasets[self.extraction_tool] + print(self.raw_data) + # breakpoint() # Hardcoded for RTransparent for the moment, update to more generic later self.param.filter_pubdate.bounds = ( self.raw_data.year.min(), - self.raw_data.year.max(), + # self.raw_data.year.max(), + # Use current year instead, so the "Past X years" buttons work + datetime.now().year, ) self.param.filter_pubdate.default = ( self.raw_data.year.min(), self.raw_data.year.max(), ) + self.filter_pubdate = (self.raw_data.year.min(), self.raw_data.year.max()) self.param.filter_selected_journals.objects = self.raw_data.journal.unique() # As default, takes the journals with the biggest number of occurences @@ -125,24 +160,34 @@ def filtered_grouped_data(self): return result - @pn.depends("dataset", "splitting_var", "filter_pubdate") + @pn.depends("extraction_tool", "splitting_var", "filter_pubdate", "metrics") def get_echart_plot(self): + print("GET_ECHART_PLOT") + + if self.filter_pubdate is None: + # The filters are not yet initialized + # Let's return an empty plot + return pn.pane.ECharts({}, height=640, width=840, renderer="svg") + df = self.filtered_grouped_data() + raw_metric = metrics_by_title[self.metrics] + xAxis = df[self.splitting_var].tolist() series = [ { - "id": serie, - "name": serie, + "id": self.metrics, + "name": self.metrics, "type": "line", - "data": df[serie].tolist(), + "data": df[raw_metric].tolist(), } - for serie in ["percent_is_data_pred", "percent_is_code_pred"] ] + title = f"{self.metrics} by {self.splitting_var} ({int(self.filter_pubdate[0])}-{int(self.filter_pubdate[1])})" + echarts_config = { "title": { - "text": "Percentage of Publications Following Open Science Practices Over Time", + "text": title, }, "tooltip": { "show": True, @@ -152,8 +197,9 @@ def get_echart_plot(self): # {{a1}} : {{c1}} """, }, "legend": { - #'data':['Sales'] - "data": ["is_data_pred", "is_code_pred"], + "data": [ + {"name": self.metrics, "icon": "path://M 0 0 H 20 V 20 H 0 Z"}, + ], "orient": "vertical", "right": 10, "top": 20, @@ -183,6 +229,12 @@ def get_echart_plot(self): @pn.depends("filter_pubdate.bounds") def get_pubdate_filter(self): + print("GET_PUBDATE_FILTER") + + # It's the slider that controls the filter_pubdate param + pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate) + + # The text inputs only reflect and update the value of the slider's bounds start_pubdate_input = pn.widgets.TextInput( value=str(int(self.param.filter_pubdate.bounds[0])), width=80 ) @@ -190,14 +242,15 @@ def get_pubdate_filter(self): value=str(int(self.param.filter_pubdate.bounds[1])), width=80 ) - pubdate_slider = pn.widgets.RangeSlider.from_param(self.param.filter_pubdate) - + # When the slider's value change, update the TextInputs def update_pubdate_text_inputs(event): start_pubdate_input.value = str(pubdate_slider.value[0]) end_pubdate_input.value = str(pubdate_slider.value[1]) pubdate_slider.param.watch(update_pubdate_text_inputs, "value") + # When the TextInputs' value change, update the slider, + # which updated the filter_pubdate param def update_pubdate_slider(event): pubdate_slider.value = ( int(start_pubdate_input.value or self.param.filter_pubdate.bounds[0]), @@ -207,10 +260,42 @@ def update_pubdate_slider(event): start_pubdate_input.param.watch(update_pubdate_slider, "value") end_pubdate_input.param.watch(update_pubdate_slider, "value") - return pn.Column(pn.Row(start_pubdate_input, end_pubdate_input), pubdate_slider) + last_year_button = pn.widgets.Button( + name="Last year", width=80, button_type="light", button_style="solid" + ) + past_5years_button = pn.widgets.Button( + name="Past 5 years", width=80, button_type="light", button_style="solid" + ) + past_10years_button = pn.widgets.Button( + name="Past 10 years", width=80, button_type="light", button_style="solid" + ) + + def did_click_shortcut_button(event): + print(event) + if event.obj.name == "Last year": + pubdate_slider.value = (datetime.now().year, datetime.now().year) + elif event.obj.name == "Past 5 years": + pubdate_slider.value = (datetime.now().year - 5, datetime.now().year) + elif event.obj.name == "Past 10 years": + pubdate_slider.value = (datetime.now().year - 10, datetime.now().year) + + last_year_button.on_click(did_click_shortcut_button) + past_5years_button.on_click(did_click_shortcut_button) + past_10years_button.on_click(did_click_shortcut_button) + pubdate_shortcuts = pn.Row( + last_year_button, past_5years_button, past_10years_button + ) + + return pn.Column( + pn.Row(start_pubdate_input, end_pubdate_input), + pubdate_slider, + pubdate_shortcuts, + ) - @pn.depends("dataset", "filter_journal") + @pn.depends("extraction_tool", "filter_journal") def get_sidebar(self): + print("GET_SIDEBAR") + items = [ pn.pane.Markdown("## Filters"), pn.pane.Markdown("### Applied Filters"), @@ -218,7 +303,7 @@ def get_sidebar(self): pn.layout.Divider(), pn.pane.Markdown("### Publication Details"), # pn.pane.Markdown("#### Publication Date"), - self.get_pubdate_filter, + self.get_pubdate_filter(), pn.layout.Divider(), pn.widgets.Select.from_param(self.param.filter_journal), ] @@ -234,18 +319,22 @@ def get_sidebar(self): return sidebar - @pn.depends("dataset") + @pn.depends("extraction_tool") def get_top_bar(self): + print("GET_TOP_BAR") + return pn.Row( - pn.widgets.Select.from_param(self.param.dataset), - pn.widgets.CheckBoxGroup.from_param(self.param.metrics), + pn.widgets.Select.from_param(self.param.extraction_tool), + pn.widgets.Select.from_param(self.param.metrics), pn.widgets.Select.from_param(self.param.splitting_var), ) @pn.depends( - "dataset", "filter_journal", "filter_selected_journals", "splitting_var" + "extraction_tool", "filter_journal", "filter_selected_journals", "splitting_var" ) def get_dashboard(self): + print("GET_DASHBOARD") + # Layout the dashboard dashboard = pn.Column( "# Data and code transparency", From 7b8fb6bfefb3e834495bf59339c970ba6643dbda Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Simonard Date: Thu, 22 Aug 2024 14:33:16 +0200 Subject: [PATCH 2/2] WIP Dashboard : select picker, splitting vars --- web/dashboard/components/select_picker.py | 480 ++++++++++++++++++++++ web/dashboard/main_dashboard.py | 156 ++++--- 2 files changed, 573 insertions(+), 63 deletions(-) create mode 100644 web/dashboard/components/select_picker.py diff --git a/web/dashboard/components/select_picker.py b/web/dashboard/components/select_picker.py new file mode 100644 index 00000000..8ef0b496 --- /dev/null +++ b/web/dashboard/components/select_picker.py @@ -0,0 +1,480 @@ +import param +from panel.reactive import ReactiveHTML +from panel.widgets import Widget + + +class SelectPicker(ReactiveHTML, Widget): + title = param.String(default="") + + options = param.List(doc="List of possible values to be selected", default=[]) + filtered_options = param.List( + doc="List of possible values to be selected", default=[] + ) + value = param.List(doc="The actual list of selected values", default=[]) + + filter_str = param.String(default="") + + update_title_callback = None + + _child_config = {"options": "model"} + + def __init__(self, **params): + super().__init__(**params) + + @classmethod + def from_param(cls, parameter: param.Parameter, update_title_callback, **params): + result = super().from_param(parameter, **params) + result.update_title_callback = update_title_callback + result.value_did_change() + return result + + def update_filtereted_options(self): + self.filtered_options = [ + opt + for opt in self.options + if isinstance(opt, str) and self.filter_str.lower() in opt.lower() + ] + + @param.depends("options", watch=True, on_init=True) + def options_did_change(self): + # print("options_did_change", self.options) + self.value = [v for v in self.value if v in self.options] + self.update_filtereted_options() + + @param.depends("value", watch=True, on_init=True) + def value_did_change(self): + # print("value_did_change", self.value) + if self.update_title_callback is not None: + self.title = self.update_title_callback(self, self.value, self.options) + + @param.depends("filter_str", watch=True, on_init=True) + def filter_str_did_change(self): + # print("filter_str_did_change", self.filter_str) + self.update_filtereted_options() + + @param.depends("filtered_options", watch=True, on_init=True) + def filtered_options_did_change(self): + # print("filtered_options_did_change", self.filtered_options) + pass + + _checkbox_group_css = """ + + """ + + _style = """ + + + """ + + _template = ( + _style + + _checkbox_group_css + + """ + +
+ +
+

${title}

+
+ + +
+ """ + ) + + _scripts = { + "after_layout": """ + /*console.log("after_layout");*/ + + """, + "input_change": """ + + console.log("input_change", data, model, state, view); + console.log(model.checkboxes_list); + + let new_value = []; + model.checkboxes_list.forEach((cb, idx) => { + if (cb.checked) { + new_value.push(cb.value); + } + }); + data.value = new_value; + + setTimeout(function() { + self.update_select_all_checkbox() + }, 100); + + """, + "update_select_all_checkbox": """ + + /* console.log("update_select_all_checkbox", data.value.length , data.options.length); */ + + if ( data.value.length == data.options.length) { + select_all_cb.checked = true; + select_all_cb.classList.remove("intermediary"); + } else if ( data.value.length == 0 ) { + select_all_cb.checked = false; + select_all_cb.classList.remove("intermediary"); + } else { + select_all_cb.classList.add("intermediary"); + } + + + """, + "filter_text_input_did_change": """ + + /* console.log("filter_text_input_did_change", filter_text_input.value); */ + data.filter_str = filter_text_input.value; + + """, + "clear_filter": """ + /* console.log("clear filter"); */ + filter_text_input.value = ""; + self.filter_text_input_did_change(); + """, + "did_click_select_all": """ + + /* console.log(select_all_cb, select_all_cb.checked); */ + model.checkboxes_list.forEach((cb, idx) => { + cb.checked = select_all_cb.checked; + }); + self.input_change(); + + """, + "filtered_options": """ + self.rebuild_checkboxes(); + """, + "rebuild_checkboxes": """ + + if ( typeof data.filtered_options === "undefined") { + /* console.log("rebuild_checkboxes but undefined", data.filtered_options, model.checkboxes_list) */ + return + } + + + /* console.log("rebuild_checkboxes", data.filtered_options, data.value);*/ + new_checkboxes_list = []; + + checkboxes_container.innerHTML = ""; + + data.filtered_options.forEach((opt, idx) => { + let cb = document.createElement("input"); + cb.type = "checkbox"; + cb.id = `cb${idx}`; + cb.name = `cb${idx}`; + cb.value = opt; + cb.checked = true ? data.value.includes(opt) : false; + cb.onchange = self.input_change; + + let lbl = document.createElement("label"); + lbl.htmlFor = `cb${idx}`; + + let lblspan = document.createElement("span"); + lblspan.innerHTML = opt; + + lbl.appendChild(cb); + lbl.appendChild(lblspan); + + checkboxes_container.appendChild(lbl); + /* checkboxes_container.appendChild(document.createElement("br")); + */ + + + new_checkboxes_list.push(cb); + }); + + model.checkboxes_list = new_checkboxes_list; + + """, + "render": """ + console.log("render"); + /* + console.log("data", data); + console.log("model", model); + console.log("state", state); + console.log("view", view); + console.log("checkboxes_container", checkboxes_container); + console.log("sp_options_list_container", sp_options_list_container); + */ + self.rebuild_checkboxes(); + self.update_select_all_checkbox() + + var isPointerEventInsideElement = function (event, element) { + var pos = { + x: event.targetTouches ? event.targetTouches[0].pageX : event.pageX, + y: event.targetTouches ? event.targetTouches[0].pageY : event.pageY + }; + var rect = element.getBoundingClientRect(); + return pos.x < rect.right && pos.x > rect.left && pos.y < rect.bottom && pos.y > rect.top; + }; + + + function hideOnClickOutside() { + + const outsideClickListener = event => { + + if ( ! isPointerEventInsideElement(event, sp_options_list_container) + && ! isPointerEventInsideElement(event, sp_container) + && ! isPointerEventInsideElement(event, sp_header) + && sp_options_list_container.style.display != 'none') { + + sp_options_list_container.style.display = 'none'; + } + } + + document.addEventListener('click', outsideClickListener); + } + + hideOnClickOutside(); + + """, + "toggle_list": """ + if (sp_options_list_container.style.display == '') { + sp_options_list_container.style.display = 'none'; + } else { + sp_options_list_container.style.display = ''; + } + """, + "remove": """ console.log("remove", state, view); """, + } diff --git a/web/dashboard/main_dashboard.py b/web/dashboard/main_dashboard.py index 97acc8d3..49916cb7 100644 --- a/web/dashboard/main_dashboard.py +++ b/web/dashboard/main_dashboard.py @@ -3,22 +3,25 @@ import pandas as pd import panel as pn import param +from components.select_picker import SelectPicker pn.extension("echarts") - pd.options.display.max_columns = None -# filters = { -# "journal" : "category", -# "metrics" : "select", -# } - groups = {"year": "int"} -extraction_tools_metrics = { - "RTransparent": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"] +extraction_tools_params = { + "RTransparent": { + "metrics": ["is_data_pred", "is_code_pred", "score", "eigenfactor_score"], + "splitting_vars": [ + "None", + "journal", + "affiliation_country", + "fund_pmc_institute", + ], + } } dims_aggregations = { @@ -69,16 +72,7 @@ class MainDashboard(param.Parameterized): step=1, label="Publication date" ) - filter_journal = param.Selector( - default="All journals (including empty)", - objects=[ - "All journals (including empty)", - "All journals (excluding empty values)", - "Only selected journals", - ], - label="Journal", - ) - filter_selected_journals = param.ListSelector(default=[], objects=[], label="") + filter_journal = param.ListSelector(default=[], objects=[], label="Journal") def __init__(self, datasets, **params): super().__init__(**params) @@ -90,31 +84,42 @@ def __init__(self, datasets, **params): self.param.extraction_tool.objects = list(self.datasets.keys()) self.extraction_tool = self.param.extraction_tool.objects[0] + self.journal_select_picker = SelectPicker.from_param( + self.param.filter_journal, + update_title_callback=lambda select_picker, + values, + options: self.new_picker_title("journals", select_picker, values, options), + ) + @pn.depends("extraction_tool", watch=True) def did_change_extraction_tool(self): print("DID_CHANGE_EXTRACTION_TOOL") - new_extraction_tools_metrics_metrics = extraction_tools_metrics[ - self.extraction_tool + # Updated the metrics param + new_extraction_tools_metrics = extraction_tools_params[self.extraction_tool][ + "metrics" ] new_metrics = [] - for m in new_extraction_tools_metrics_metrics: + for m in new_extraction_tools_metrics: for agg in dims_aggregations[m]: new_metrics.append(metrics_titles[f"{agg}_{m}"]) self.param.metrics.objects = new_metrics self.metrics = self.param.metrics.objects[0] - # self.param.metrics.objects = extraction_tools_metrics[self.extraction_tool] - # self.metrics = self.param.metrics.objects[0] + # Update the splitting_var param + new_extraction_tools_splitting_vars = extraction_tools_params[ + self.extraction_tool + ]["splitting_vars"] + self.param.splitting_var.objects = new_extraction_tools_splitting_vars + self.splitting_var = self.param.splitting_var.objects[0] + # Update the raw data self.raw_data = self.datasets[self.extraction_tool] - print(self.raw_data) - # breakpoint() - - # Hardcoded for RTransparent for the moment, update to more generic later + # Update the filters + ## filter_pubdate self.param.filter_pubdate.bounds = ( self.raw_data.year.min(), # self.raw_data.year.max(), @@ -127,19 +132,14 @@ def did_change_extraction_tool(self): ) self.filter_pubdate = (self.raw_data.year.min(), self.raw_data.year.max()) - self.param.filter_selected_journals.objects = self.raw_data.journal.unique() - # As default, takes the journals with the biggest number of occurences - self.filter_selected_journals = list( - self.raw_data.journal.value_counts().iloc[:10].index - ) + # ## filter_journal + self.param.filter_journal.objects = self.raw_data.journal.unique() + self.filter_journal = list(self.raw_data.journal.value_counts().iloc[:10].index) def filtered_grouped_data(self): filters = [] - if self.filter_journal == "All journals (excluding empty values)": - filters.append(("journal.notnull()")) - elif self.filter_journal == "Only selected journals": - filters.append(f"journal in {self.filter_selected_journals}") + filters.append(f"journal in {self.filter_journal}") if self.filter_pubdate is not None: filters.append(f"year >= {self.filter_pubdate[0]}") @@ -154,9 +154,11 @@ def filtered_grouped_data(self): for agg in aggs: aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg]) - result = ( - filtered_df.groupby(self.splitting_var).agg(**aggretations).reset_index() - ) + groupers = ["year"] + if self.splitting_var != "None": + groupers.append(self.splitting_var) + + result = filtered_df.groupby(groupers).agg(**aggretations).reset_index() return result @@ -173,15 +175,39 @@ def get_echart_plot(self): raw_metric = metrics_by_title[self.metrics] - xAxis = df[self.splitting_var].tolist() - series = [ - { - "id": self.metrics, - "name": self.metrics, - "type": "line", - "data": df[raw_metric].tolist(), - } - ] + xAxis = df["year"].tolist() + + if self.splitting_var == "None": + series = [ + { + "id": self.metrics, + "name": self.metrics, + "type": "line", + "data": df[raw_metric].tolist(), + } + ] + legend_data = [ + {"name": self.metrics, "icon": "path://M 0 0 H 20 V 20 H 0 Z"}, + ] + else: + series = [] + legend_data = [] + + # TODO : handle other splitting_var + if self.splitting_var == "journal": + for journal in sorted(self.filter_journal): + journal_df = df.query(f"journal == '{journal}'") + series.append( + { + "id": journal, + "name": journal, + "type": "line", + "data": journal_df[raw_metric].tolist(), + } + ) + legend_data.append( + {"name": journal, "icon": "path://M 0 0 H 20 V 20 H 0 Z"} + ) title = f"{self.metrics} by {self.splitting_var} ({int(self.filter_pubdate[0])}-{int(self.filter_pubdate[1])})" @@ -197,9 +223,7 @@ def get_echart_plot(self): # {{a1}} : {{c1}} """, }, "legend": { - "data": [ - {"name": self.metrics, "icon": "path://M 0 0 H 20 V 20 H 0 Z"}, - ], + "data": legend_data, "orient": "vertical", "right": 10, "top": 20, @@ -292,7 +316,22 @@ def did_click_shortcut_button(event): pubdate_shortcuts, ) - @pn.depends("extraction_tool", "filter_journal") + def new_picker_title(self, entity, picker, values, options): + value_count = len(picker.value) + options_count = len(picker.options) + + if value_count == options_count: + title = f"All {entity} ({ value_count })" + + elif value_count == 0: + title = f"No {entity} (0 out of { options_count })" + + else: + title = f"{ value_count } {entity} out of { options_count }" + + return title + + @pn.depends("extraction_tool") def get_sidebar(self): print("GET_SIDEBAR") @@ -305,16 +344,9 @@ def get_sidebar(self): # pn.pane.Markdown("#### Publication Date"), self.get_pubdate_filter(), pn.layout.Divider(), - pn.widgets.Select.from_param(self.param.filter_journal), + self.journal_select_picker, ] - if self.filter_journal == "Only selected journals": - items.append( - pn.widgets.MultiChoice.from_param( - self.param.filter_selected_journals, max_items=10 - ) - ) - sidebar = pn.Column(*items) return sidebar @@ -329,9 +361,7 @@ def get_top_bar(self): pn.widgets.Select.from_param(self.param.splitting_var), ) - @pn.depends( - "extraction_tool", "filter_journal", "filter_selected_journals", "splitting_var" - ) + @pn.depends("extraction_tool", "filter_journal", "splitting_var") def get_dashboard(self): print("GET_DASHBOARD")