From ca3c09b950d77bc5e9b13548f274d04186fa4b5e Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Simonard Date: Mon, 26 Aug 2024 15:05:31 +0200 Subject: [PATCH] handle data tags --- web/dashboard/app.py | 6 +- web/dashboard/main_dashboard.py | 100 ++++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/web/dashboard/app.py b/web/dashboard/app.py index 1677fe65..1e6de2cf 100644 --- a/web/dashboard/app.py +++ b/web/dashboard/app.py @@ -39,10 +39,8 @@ def load_data(): # necessary conversion to tuples, which is hashable type # needed for grouping - raw_data.affiliation_country = raw_data.affiliation_country.apply( - lambda cntry: tuple(cntry) - ) - raw_data.funder = raw_data.funder.apply(lambda fndrs: tuple(fndrs)) + for col in ["affiliation_country", "funder", "data_tags"]: + raw_data[col] = raw_data[col].apply(lambda x: tuple(x)) return raw_data diff --git a/web/dashboard/main_dashboard.py b/web/dashboard/main_dashboard.py index 0c49331d..b36afbd1 100644 --- a/web/dashboard/main_dashboard.py +++ b/web/dashboard/main_dashboard.py @@ -25,6 +25,7 @@ "journal", "affiliation_country", "funder", + "data_tags", ], } } @@ -62,7 +63,7 @@ class MainDashboard(param.Parameterized): """ # High-level parameters. - extraction_tool = param.Selector(default="", objects=[], label="Extraction tool") + extraction_tool = param.Selector(default="", objects=[], label="Metrics group") metrics = param.Selector(default=[], objects=[], label="Metrics") @@ -82,6 +83,11 @@ class MainDashboard(param.Parameterized): filter_funder = param.ListSelector(default=[], objects=[], label="Funder") + filter_tags = param.ListSelector(default=[], objects=[], label="Tags") + + # Internal mechanisms + trigger_rendering = param.Integer(default=0) + # UI elements echarts_pane = pn.pane.ECharts( {}, height=640, width=960, renderer="svg", options={"replaceMerge": ["series"]} @@ -120,6 +126,13 @@ def __init__(self, datasets, **params): options: self.new_picker_title("funders", select_picker, values, options), ) + self.tags_select_picker = SelectPicker.from_param( + self.param.filter_tags, + update_title_callback=lambda select_picker, + values, + options: self.new_picker_title("tags", select_picker, values, options), + ) + self.build_pubdate_filter() @pn.depends("extraction_tool", watch=True) @@ -166,7 +179,9 @@ def did_change_extraction_tool(self): self.param.filter_journal.objects = self.raw_data.journal.unique() ## affiliation country - countries_with_count = self.get_countries_with_count() + countries_with_count = self.get_col_values_with_count( + "affiliation_country", lambda x: x is None + ) def country_sorter(c): return countries_with_count[c] @@ -176,7 +191,9 @@ def country_sorter(c): ) ## funder - funders_with_count = self.get_funders_with_count() + funders_with_count = self.get_col_values_with_count( + "funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == "" + ) def funder_sorter(c): return funders_with_count[c] @@ -185,33 +202,33 @@ def funder_sorter(c): funders_with_count.keys(), key=funder_sorter, reverse=True ) + ## Tags + tags_with_count = self.get_col_values_with_count( + "data_tags", lambda x: x is None + ) + + def tags_sorter(c): + return tags_with_count[c] + + self.param.filter_tags.objects = sorted( + tags_with_count.keys(), key=tags_sorter, reverse=True + ) + # This triggers function "did_change_splitting_var" # which updates filter_journal, filter_affiliation_country and filter_funder self.splitting_var = self.param.splitting_var.objects[0] @lru_cache - def get_funders_with_count(self): - funders = {} - for row in self.raw_data.funder.values: - if len(row) == 0 or len(row) == 1 and row[0] == "": + def get_col_values_with_count(self, col, none_test): + values = {} + for row in self.raw_data[col].values: + if none_test(row): ## Keeping "None" as a string on purpose, to represent it in the SelectPicker - funders["None"] = funders.get("None", 0) + 1 + values["None"] = values.get("None", 0) + 1 else: for c in row: - funders[c] = funders.get(c, 0) + 1 - return funders - - @lru_cache - def get_countries_with_count(self): - countries = {} - for row in self.raw_data.affiliation_country.values: - if row is None: - ## Keeping "None" as a string on purpose, to represent it in the SelectPicker - countries["None"] = countries.get("None", 0) + 1 - else: - for c in row: - countries[c] = countries.get(c, 0) + 1 - return countries + values[c] = values.get(c, 0) + 1 + return values @pn.depends("splitting_var", watch=True) def did_change_splitting_var(self): @@ -235,7 +252,9 @@ def did_change_splitting_var(self): if self.splitting_var == "affiliation_country": # We want to show all countries, but pre-select only the top 10 - countries_with_count = self.get_countries_with_count() + countries_with_count = self.get_col_values_with_count( + "affiliation_country", lambda x: x is None + ) # pre-filter the countries because there are a lot countries_with_count = { @@ -264,7 +283,9 @@ def did_change_splitting_var(self): if self.splitting_var == "funder": # We want to show all funders, but pre-select only the top 10 - funders_with_count = self.get_funders_with_count() + funders_with_count = self.get_col_values_with_count( + "funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == "" + ) top_5_min = sorted( [ @@ -284,15 +305,24 @@ def did_change_splitting_var(self): else: selected_funders = self.param.filter_funder.objects + # There is currently only two tags, so no need to pre-select a top subset + selected_tags = self.param.filter_tags.objects + # Trigger a batch update of the filters value, # preventing from re-rendering the dashboard several times # and preventing intermediate states where the dashboard renders onces # with all funders for instance, and then restricting on the selected funders. + # Also, we increment the trigger_rendering to force the update of the echarts plot. + # This is usefull when switching from splitting var "None" to "data_tags" for instance. + # In this case, the selected tags don't change, and the plot won't update, hence the need + # for trigger_rendering. print("TRIGGER UPDATE") self.param.update( filter_journal=selected_journals, filter_affiliation_country=selected_countries, filter_funder=selected_funders, + filter_tags=selected_tags, + trigger_rendering=self.trigger_rendering + 1, ) if self.splitting_var == "None": @@ -342,6 +372,17 @@ def funder_filter(cell): filtered_df = filtered_df[filtered_df.funder.apply(funder_filter)] + if len(filtered_df) > 0 and len(self.filter_tags) != len( + self.param.filter_tags.objects + ): + # the filter on tags is similar to the filter on countries + def tags_filter(cell): + if cell is None: + return "None" in self.filter_tags + return any(c in self.filter_tags for c in cell) + + filtered_df = filtered_df[filtered_df.data_tags.apply(tags_filter)] + aggretations = {} for field, aggs in dims_aggregations.items(): for agg in aggs: @@ -353,6 +394,8 @@ def funder_filter(cell): result = filtered_df.groupby(groupers).agg(**aggretations).reset_index() + print("FILTERED_GROUPED_DATA_DONE", len(result)) + return result @pn.depends( @@ -361,6 +404,8 @@ def funder_filter(cell): "filter_affiliation_country", "filter_journal", "filter_funder", + "filter_tags", + "trigger_rendering", watch=True, ) def updated_echart_plot(self): @@ -407,6 +452,12 @@ def updated_echart_plot(self): splitting_var_filter = self.filter_funder splitting_var_column = "funder" splitting_var_query = lambda cell, selected_item: selected_item in cell + + elif self.splitting_var == "data_tags": + splitting_var_filter = self.filter_tags + splitting_var_column = "data_tags" + splitting_var_query = lambda cell, selected_item: selected_item in cell + else: print("Defaulting to splitting var 'journal' ") splitting_var_filter = self.filter_journal @@ -594,6 +645,7 @@ def get_sidebar(self): self.journal_select_picker, self.affiliation_country_select_picker, self.funder_select_picker, + self.tags_select_picker, ] sidebar = pn.Column(*items)