From 5d665c84efa3378ab26ef8ff6573ad93fb9fc502 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Simonard Date: Fri, 23 Aug 2024 10:39:35 +0200 Subject: [PATCH] WIP Dashboard : splitting variable + various --- web/dashboard/app.py | 9 ++ web/dashboard/components/select_picker.py | 19 +++- web/dashboard/main_dashboard.py | 130 ++++++++++++++++++---- 3 files changed, 136 insertions(+), 22 deletions(-) diff --git a/web/dashboard/app.py b/web/dashboard/app.py index db29529f..c83d814c 100644 --- a/web/dashboard/app.py +++ b/web/dashboard/app.py @@ -92,6 +92,15 @@ def on_load(): # Harcoded for now, will be added to the raw data later raw_data["metrics"] = "RTransparent" + # Cleanup - might be handlded upstream in the future + # raw_countries = raw_data.affiliation_country.unique() + + raw_data.affiliation_country = raw_data.affiliation_country.apply( + lambda cntry: ( + tuple(set(map(str.strip, cntry.split(";")))) if cntry is not None else cntry + ) + ) + pn.state.cache["data"] = raw_data diff --git a/web/dashboard/components/select_picker.py b/web/dashboard/components/select_picker.py index 8ef0b496..08a9a461 100644 --- a/web/dashboard/components/select_picker.py +++ b/web/dashboard/components/select_picker.py @@ -183,7 +183,7 @@ def filtered_options_did_change(self): div.sp_options_list_container { - background-color: var(--bs-form-control-bg); + background-color: lightgray; box-shadow: rgba(0, 0, 0, 0.2) 0px 5px 5px -3px, rgba(0, 0, 0, 0.14) 0px 8px 10px 1px, rgba(0, 0, 0, 0.12) 0px 3px 14px 2px; @@ -255,6 +255,19 @@ def filtered_options_did_change(self): height:100%; } + .sp_filter_clear_btn { + padding-top: 4px; + padding-right: 4px; + } + + .sp_filter_clear_btn .bk-btn { + border: lightgray solid 2px; + fill: lightgray; + } + .sp_filter_clear_btn .bk-btn :hover{ + border: black solid 2px; + fill: black; + } /* .sp_options_list_container input[type="checkbox"].intermediary::after { content: "-"; @@ -330,8 +343,8 @@ def filtered_options_did_change(self): """, "input_change": """ - console.log("input_change", data, model, state, view); - console.log(model.checkboxes_list); + /*console.log("input_change", data, model, state, view); + console.log(model.checkboxes_list);*/ let new_value = []; model.checkboxes_list.forEach((cb, idx) => { diff --git a/web/dashboard/main_dashboard.py b/web/dashboard/main_dashboard.py index 49916cb7..7a097180 100644 --- a/web/dashboard/main_dashboard.py +++ b/web/dashboard/main_dashboard.py @@ -74,6 +74,10 @@ class MainDashboard(param.Parameterized): filter_journal = param.ListSelector(default=[], objects=[], label="Journal") + filter_affiliation_country = param.ListSelector( + default=[], objects=[], label="Country" + ) + def __init__(self, datasets, **params): super().__init__(**params) @@ -91,6 +95,15 @@ def __init__(self, datasets, **params): options: self.new_picker_title("journals", select_picker, values, options), ) + self.affiliation_country_select_picker = SelectPicker.from_param( + self.param.filter_affiliation_country, + update_title_callback=lambda select_picker, + values, + options: self.new_picker_title( + "affiliation countries", select_picker, values, options + ), + ) + @pn.depends("extraction_tool", watch=True) def did_change_extraction_tool(self): print("DID_CHANGE_EXTRACTION_TOOL") @@ -132,14 +145,54 @@ def did_change_extraction_tool(self): ) self.filter_pubdate = (self.raw_data.year.min(), self.raw_data.year.max()) - # ## filter_journal + ## filter_journal self.param.filter_journal.objects = self.raw_data.journal.unique() self.filter_journal = list(self.raw_data.journal.value_counts().iloc[:10].index) + ## affiliation country + ## Keeping "None" as a string on purpose, to represent it in the SelectPicker + countries_with_count = {"None": 0} + for row in self.raw_data.affiliation_country.values: + if row is None: + countries_with_count["None"] += 1 + continue + for c in row: + if c not in countries_with_count: + countries_with_count[c] = 1 + else: + countries_with_count[c] += 1 + + ## We want to show all countries, but pre-select only the top 10 + countries_with_count = { + country: count + for country, count in countries_with_count.items() + if count > 10 + } + + top_10_min = sorted( + [count for _, count in countries_with_count.items()], reverse=True + )[10] + selected_countries = [ + country + for country, count in countries_with_count.items() + if count >= top_10_min + ] + + def country_sorter(c): + return countries_with_count[c] + + self.param.filter_affiliation_country.objects = sorted( + countries_with_count.keys(), key=country_sorter, reverse=True + ) + self.filter_affiliation_country = selected_countries + def filtered_grouped_data(self): + print("FILTERED_GROUPED_DATA") + filters = [] filters.append(f"journal in {self.filter_journal}") + # filters.append(f"affiliation_country ") if self.filter_pubdate is not None: filters.append(f"year >= {self.filter_pubdate[0]}") @@ -149,6 +202,15 @@ def filtered_grouped_data(self): self.raw_data.query(" and ".join(filters)) if filters else self.raw_data ) + # the filter on countries is a bit different as the rows + # are list of countries + def country_filter(cell): + if cell is None: + return "None" in self.filter_affiliation_country + return any(c in self.filter_affiliation_country for c in cell) + + filtered_df = filtered_df[filtered_df.affiliation_country.apply(country_filter)] + aggretations = {} for field, aggs in dims_aggregations.items(): for agg in aggs: @@ -175,7 +237,7 @@ def get_echart_plot(self): raw_metric = metrics_by_title[self.metrics] - xAxis = df["year"].tolist() + xAxis = df["year"].unique().tolist() if self.splitting_var == "None": series = [ @@ -193,21 +255,45 @@ def get_echart_plot(self): series = [] legend_data = [] - # TODO : handle other splitting_var - if self.splitting_var == "journal": - for journal in sorted(self.filter_journal): - journal_df = df.query(f"journal == '{journal}'") - series.append( - { - "id": journal, - "name": journal, - "type": "line", - "data": journal_df[raw_metric].tolist(), - } - ) - legend_data.append( - {"name": journal, "icon": "path://M 0 0 H 20 V 20 H 0 Z"} - ) + if self.splitting_var == "affiliation_country": + splitting_var_filter = self.filter_affiliation_country + splitting_var_column = "affiliation_country" + splitting_var_query = lambda cell, selected_item: selected_item in cell + + elif self.splitting_var == "fund_pmc_institute": + splitting_var_filter = self.filter_fund_pmc_institute + splitting_var_column = "fund_pmc_institute" + splitting_var_query = lambda cell, selected_item: cell == selected_item + else: + print("Defaulting to splitting var 'journal' ") + splitting_var_filter = self.filter_journal + splitting_var_column = "journal" + splitting_var_query = lambda cell, selected_item: cell == selected_item + + for selected_item in sorted(splitting_var_filter): + # sub_df = df.query(f"{splitting_var_column} == '{selected_item}'") + sub_df = ( + df[ + df[splitting_var_column].apply( + lambda x: splitting_var_query(x, selected_item) + ) + ] + .groupby("year") + .agg({raw_metric: "mean"}) # todo fix this + .reset_index() + ) + + series.append( + { + "id": selected_item, + "name": selected_item, + "type": "line", + "data": sub_df[raw_metric].tolist(), + } + ) + legend_data.append( + {"name": selected_item, "icon": "path://M 0 0 H 20 V 20 H 0 Z"} + ) title = f"{self.metrics} by {self.splitting_var} ({int(self.filter_pubdate[0])}-{int(self.filter_pubdate[1])})" @@ -232,7 +318,7 @@ def get_echart_plot(self): }, "xAxis": { "data": xAxis, - "name": self.splitting_var, + "name": "year", "nameLocation": "center", "nameGap": 30, }, @@ -345,6 +431,7 @@ def get_sidebar(self): self.get_pubdate_filter(), pn.layout.Divider(), self.journal_select_picker, + self.affiliation_country_select_picker, ] sidebar = pn.Column(*items) @@ -361,7 +448,12 @@ def get_top_bar(self): pn.widgets.Select.from_param(self.param.splitting_var), ) - @pn.depends("extraction_tool", "filter_journal", "splitting_var") + @pn.depends( + "extraction_tool", + "filter_journal", + "filter_affiliation_country", + "splitting_var", + ) def get_dashboard(self): print("GET_DASHBOARD")