Skip to content

Commit

Permalink
handle data tags
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrotsmnrd committed Aug 26, 2024
1 parent 9464619 commit ca3c09b
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 28 deletions.
6 changes: 2 additions & 4 deletions web/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,8 @@ def load_data():

# necessary conversion to tuples, which is hashable type
# needed for grouping
raw_data.affiliation_country = raw_data.affiliation_country.apply(
lambda cntry: tuple(cntry)
)
raw_data.funder = raw_data.funder.apply(lambda fndrs: tuple(fndrs))
for col in ["affiliation_country", "funder", "data_tags"]:
raw_data[col] = raw_data[col].apply(lambda x: tuple(x))

return raw_data

Expand Down
100 changes: 76 additions & 24 deletions web/dashboard/main_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"journal",
"affiliation_country",
"funder",
"data_tags",
],
}
}
Expand Down Expand Up @@ -62,7 +63,7 @@ class MainDashboard(param.Parameterized):
"""

# High-level parameters.
extraction_tool = param.Selector(default="", objects=[], label="Extraction tool")
extraction_tool = param.Selector(default="", objects=[], label="Metrics group")

metrics = param.Selector(default=[], objects=[], label="Metrics")

Expand All @@ -82,6 +83,11 @@ class MainDashboard(param.Parameterized):

filter_funder = param.ListSelector(default=[], objects=[], label="Funder")

filter_tags = param.ListSelector(default=[], objects=[], label="Tags")

# Internal mechanisms
trigger_rendering = param.Integer(default=0)

# UI elements
echarts_pane = pn.pane.ECharts(
{}, height=640, width=960, renderer="svg", options={"replaceMerge": ["series"]}
Expand Down Expand Up @@ -120,6 +126,13 @@ def __init__(self, datasets, **params):
options: self.new_picker_title("funders", select_picker, values, options),
)

self.tags_select_picker = SelectPicker.from_param(
self.param.filter_tags,
update_title_callback=lambda select_picker,
values,
options: self.new_picker_title("tags", select_picker, values, options),
)

self.build_pubdate_filter()

@pn.depends("extraction_tool", watch=True)
Expand Down Expand Up @@ -166,7 +179,9 @@ def did_change_extraction_tool(self):
self.param.filter_journal.objects = self.raw_data.journal.unique()

## affiliation country
countries_with_count = self.get_countries_with_count()
countries_with_count = self.get_col_values_with_count(
"affiliation_country", lambda x: x is None
)

def country_sorter(c):
return countries_with_count[c]
Expand All @@ -176,7 +191,9 @@ def country_sorter(c):
)

## funder
funders_with_count = self.get_funders_with_count()
funders_with_count = self.get_col_values_with_count(
"funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
)

def funder_sorter(c):
return funders_with_count[c]
Expand All @@ -185,33 +202,33 @@ def funder_sorter(c):
funders_with_count.keys(), key=funder_sorter, reverse=True
)

## Tags
tags_with_count = self.get_col_values_with_count(
"data_tags", lambda x: x is None
)

def tags_sorter(c):
return tags_with_count[c]

self.param.filter_tags.objects = sorted(
tags_with_count.keys(), key=tags_sorter, reverse=True
)

# This triggers function "did_change_splitting_var"
# which updates filter_journal, filter_affiliation_country and filter_funder
self.splitting_var = self.param.splitting_var.objects[0]

@lru_cache
def get_funders_with_count(self):
funders = {}
for row in self.raw_data.funder.values:
if len(row) == 0 or len(row) == 1 and row[0] == "":
def get_col_values_with_count(self, col, none_test):
values = {}
for row in self.raw_data[col].values:
if none_test(row):
## Keeping "None" as a string on purpose, to represent it in the SelectPicker
funders["None"] = funders.get("None", 0) + 1
values["None"] = values.get("None", 0) + 1
else:
for c in row:
funders[c] = funders.get(c, 0) + 1
return funders

@lru_cache
def get_countries_with_count(self):
countries = {}
for row in self.raw_data.affiliation_country.values:
if row is None:
## Keeping "None" as a string on purpose, to represent it in the SelectPicker
countries["None"] = countries.get("None", 0) + 1
else:
for c in row:
countries[c] = countries.get(c, 0) + 1
return countries
values[c] = values.get(c, 0) + 1
return values

@pn.depends("splitting_var", watch=True)
def did_change_splitting_var(self):
Expand All @@ -235,7 +252,9 @@ def did_change_splitting_var(self):

if self.splitting_var == "affiliation_country":
# We want to show all countries, but pre-select only the top 10
countries_with_count = self.get_countries_with_count()
countries_with_count = self.get_col_values_with_count(
"affiliation_country", lambda x: x is None
)

# pre-filter the countries because there are a lot
countries_with_count = {
Expand Down Expand Up @@ -264,7 +283,9 @@ def did_change_splitting_var(self):

if self.splitting_var == "funder":
# We want to show all funders, but pre-select only the top 10
funders_with_count = self.get_funders_with_count()
funders_with_count = self.get_col_values_with_count(
"funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
)

top_5_min = sorted(
[
Expand All @@ -284,15 +305,24 @@ def did_change_splitting_var(self):
else:
selected_funders = self.param.filter_funder.objects

# There is currently only two tags, so no need to pre-select a top subset
selected_tags = self.param.filter_tags.objects

# Trigger a batch update of the filters value,
# preventing from re-rendering the dashboard several times
# and preventing intermediate states where the dashboard renders onces
# with all funders for instance, and then restricting on the selected funders.
# Also, we increment the trigger_rendering to force the update of the echarts plot.
# This is usefull when switching from splitting var "None" to "data_tags" for instance.
# In this case, the selected tags don't change, and the plot won't update, hence the need
# for trigger_rendering.
print("TRIGGER UPDATE")
self.param.update(
filter_journal=selected_journals,
filter_affiliation_country=selected_countries,
filter_funder=selected_funders,
filter_tags=selected_tags,
trigger_rendering=self.trigger_rendering + 1,
)

if self.splitting_var == "None":
Expand Down Expand Up @@ -342,6 +372,17 @@ def funder_filter(cell):

filtered_df = filtered_df[filtered_df.funder.apply(funder_filter)]

if len(filtered_df) > 0 and len(self.filter_tags) != len(
self.param.filter_tags.objects
):
# the filter on tags is similar to the filter on countries
def tags_filter(cell):
if cell is None:
return "None" in self.filter_tags
return any(c in self.filter_tags for c in cell)

filtered_df = filtered_df[filtered_df.data_tags.apply(tags_filter)]

aggretations = {}
for field, aggs in dims_aggregations.items():
for agg in aggs:
Expand All @@ -353,6 +394,8 @@ def funder_filter(cell):

result = filtered_df.groupby(groupers).agg(**aggretations).reset_index()

print("FILTERED_GROUPED_DATA_DONE", len(result))

return result

@pn.depends(
Expand All @@ -361,6 +404,8 @@ def funder_filter(cell):
"filter_affiliation_country",
"filter_journal",
"filter_funder",
"filter_tags",
"trigger_rendering",
watch=True,
)
def updated_echart_plot(self):
Expand Down Expand Up @@ -407,6 +452,12 @@ def updated_echart_plot(self):
splitting_var_filter = self.filter_funder
splitting_var_column = "funder"
splitting_var_query = lambda cell, selected_item: selected_item in cell

elif self.splitting_var == "data_tags":
splitting_var_filter = self.filter_tags
splitting_var_column = "data_tags"
splitting_var_query = lambda cell, selected_item: selected_item in cell

else:
print("Defaulting to splitting var 'journal' ")
splitting_var_filter = self.filter_journal
Expand Down Expand Up @@ -594,6 +645,7 @@ def get_sidebar(self):
self.journal_select_picker,
self.affiliation_country_select_picker,
self.funder_select_picker,
self.tags_select_picker,
]

sidebar = pn.Column(*items)
Expand Down

0 comments on commit ca3c09b

Please sign in to comment.