Skip to content

Commit

Permalink
Merge pull request #47 from nimh-dsst/dashboard
Browse files Browse the repository at this point in the history
new pass of improvements in the dashboard
  • Loading branch information
leej3 authored Aug 27, 2024
2 parents 1c62f2d + 378f401 commit 866fb7d
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 28 deletions.
1 change: 0 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@

# Enable binary delta compression for PNG files
*.png -delta

5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
repos:
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
- id: black

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.0
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ keywords = [
dynamic = ["version"]
dependencies = [
"dill",
"colorcet",
"pandas",
"pyarrow",
"pydantic",
Expand Down
7 changes: 6 additions & 1 deletion web/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ def load_data():

# necessary conversion to tuples, which is hashable type
# needed for grouping
for col in ["affiliation_country", "funder", "data_tags"]:
for col in ["funder", "data_tags"]:
raw_data[col] = raw_data[col].apply(lambda x: tuple(x))

# convert to tuple, remove duplicates and remove leading and trailing spaces in countries names
raw_data["affiliation_country"] = raw_data["affiliation_country"].apply(
lambda x: tuple(set([v.strip() for v in x]))
)

return raw_data


Expand Down
74 changes: 53 additions & 21 deletions web/dashboard/main_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,22 @@
"funder",
"data_tags",
],
"labels": {
"None": "None",
"journal": "Journal",
"affiliation_country": "Country",
"funder": "Funder",
"data_tags": "Tags",
},
}
}

dims_aggregations = {
"is_open_data": ["percent", "count_true"],
"is_open_code": ["percent", "count_true"],
"is_open_data": ["percent", "count_true", "count"],
"is_open_code": [
"percent",
"count_true",
],
# "score": ["mean"],
# "eigenfactor_score": ["mean"],
}
Expand All @@ -47,6 +57,7 @@
"percent_is_open_code": "Code Sharing (%)",
"count_true_is_open_data": "Data Sharing",
"count_true_is_open_code": "Code Sharing",
"count_is_open_data": "Total number of publications",
"mean_score": "Mean Score",
"mean_eigenfactor_score": "Mean Eigenfactor Score",
}
Expand Down Expand Up @@ -156,6 +167,16 @@ def __init__(self, datasets, **params):
# DEBUG
self.echarts_update_button.on_click(self.did_click_update_echart_plot)

def splitting_var_label(self, splitting_var):
return extraction_tools_params[self.extraction_tool]["labels"][splitting_var]

def splitting_var_from_label(self, label):
return [
k
for k, v in extraction_tools_params[self.extraction_tool]["labels"].items()
if v == label
][0]

@pn.depends("extraction_tool", watch=True)
def did_change_extraction_tool(self):
print("DID_CHANGE_EXTRACTION_TOOL")
Expand All @@ -180,7 +201,9 @@ def did_change_extraction_tool(self):
new_extraction_tools_splitting_vars = extraction_tools_params[
self.extraction_tool
]["splitting_vars"]
self.param.splitting_var.objects = new_extraction_tools_splitting_vars
self.param.splitting_var.objects = [
self.splitting_var_label(v) for v in new_extraction_tools_splitting_vars
]

# Update the filters
## filter_pubdate
Expand All @@ -201,7 +224,7 @@ def did_change_extraction_tool(self):

## affiliation country
countries_with_count = self.get_col_values_with_count(
"affiliation_country", lambda x: x is None
"affiliation_country", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
)

def country_sorter(c):
Expand All @@ -225,7 +248,7 @@ def funder_sorter(c):

## Tags
tags_with_count = self.get_col_values_with_count(
"data_tags", lambda x: x is None
"data_tags", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
)

def tags_sorter(c):
Expand Down Expand Up @@ -258,8 +281,10 @@ def did_change_splitting_var(self):
# already set the echarts pane as loading for a better UX
self.echarts_pane.loading = True

splitting_var = self.splitting_var_from_label(self.splitting_var)

notif_msg = None
if self.splitting_var == "journal":
if splitting_var == "journal":
# We want to show all journals, but pre-select only the top 10
selected_journals = list(
self.raw_data.query("journal != 'None'")
Expand All @@ -271,10 +296,11 @@ def did_change_splitting_var(self):
else:
selected_journals = self.param.filter_journal.objects

if self.splitting_var == "affiliation_country":
if splitting_var == "affiliation_country":
# We want to show all countries, but pre-select only the top 10
countries_with_count = self.get_col_values_with_count(
"affiliation_country", lambda x: x is None
"affiliation_country",
lambda x: len(x) == 0 or len(x) == 1 and x[0] == "",
)

# pre-filter the countries because there are a lot
Expand Down Expand Up @@ -302,7 +328,7 @@ def did_change_splitting_var(self):
else:
selected_countries = self.param.filter_affiliation_country.objects

if self.splitting_var == "funder":
if splitting_var == "funder":
# We want to show all funders, but pre-select only the top 10
funders_with_count = self.get_col_values_with_count(
"funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
Expand Down Expand Up @@ -346,7 +372,7 @@ def did_change_splitting_var(self):
trigger_rendering=self.trigger_rendering + 1,
)

if self.splitting_var == "None":
if splitting_var == "None":
notif_msg = "No more splitting. Filters reset to default"

if notif_msg is not None:
Expand Down Expand Up @@ -411,7 +437,7 @@ def tags_filter(cell):

groupers = ["year"]
if self.splitting_var != "None":
groupers.append(self.splitting_var)
groupers.append(self.splitting_var_from_label(self.splitting_var))

result = filtered_df.groupby(groupers).agg(**aggretations).reset_index()

Expand Down Expand Up @@ -465,26 +491,28 @@ def updated_echart_plot(self):
series = []
legend_data = []

if self.splitting_var == "affiliation_country":
splitting_var = self.splitting_var_from_label(self.splitting_var)

if splitting_var == "affiliation_country":
splitting_var_filter = self.filter_affiliation_country
splitting_var_column = "affiliation_country"
splitting_var_query = lambda cell, selected_item: selected_item in cell
splitting_var_query = lambda cell, selected_item: selected_item in cell # noqa: E731

elif self.splitting_var == "funder":
elif splitting_var == "funder":
splitting_var_filter = self.filter_funder
splitting_var_column = "funder"
splitting_var_query = lambda cell, selected_item: selected_item in cell
splitting_var_query = lambda cell, selected_item: selected_item in cell # noqa: E731

elif self.splitting_var == "data_tags":
elif splitting_var == "data_tags":
splitting_var_filter = self.filter_tags
splitting_var_column = "data_tags"
splitting_var_query = lambda cell, selected_item: selected_item in cell
splitting_var_query = lambda cell, selected_item: selected_item in cell # noqa: E731

else:
print("Defaulting to splitting var 'journal' ")
splitting_var_filter = self.filter_journal
splitting_var_column = "journal"
splitting_var_query = lambda cell, selected_item: cell == selected_item
splitting_var_query = lambda cell, selected_item: cell == selected_item # noqa: E731

last_year_values = {}
for selected_item in sorted(splitting_var_filter):
Expand All @@ -497,9 +525,10 @@ def updated_echart_plot(self):
]

if len(sub_df) > 0:
aggregation = "mean" if "percent" in raw_metric else "sum"
sub_df = (
sub_df.groupby("year")
.agg({raw_metric: "mean"}) # todo fix this
.agg({raw_metric: aggregation})
.reset_index()
)

Expand All @@ -514,7 +543,7 @@ def updated_echart_plot(self):
"name": selected_item,
"type": "line",
"data": sub_df[raw_metric].tolist(),
# Shows a label at the end of the line.
# Shows a label at the end of the plotted line.
# Labels end up overlapping in some cases.
# To fix this, we would need to change the offset of the label
# with values calculated to avoid overlapping.
Expand Down Expand Up @@ -559,6 +588,7 @@ def updated_echart_plot(self):
"tooltip": {
"show": True,
"trigger": "axis",
"order": "valueDesc",
# "formatter": f"""<b>{self.splitting_var}</b> : {{b0}} <br />
# {{a0}} : {{c0}} <br />
# {{a1}} : {{c1}} """,
Expand Down Expand Up @@ -591,7 +621,9 @@ def updated_echart_plot(self):
"fontFamily": "Roboto",
"fontSize": "20",
},
"axisLabel": {"formatter": "{value}%"},
"axisLabel": {
"formatter": "{value}%" if "percent" in raw_metric else "{value}"
},
},
"series": series,
}
Expand Down

0 comments on commit 866fb7d

Please sign in to comment.