Skip to content

Commit

Permalink
Fix for "None" values in the variables
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrotsmnrd committed Aug 27, 2024
1 parent 378f401 commit 8c12246
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
21 changes: 14 additions & 7 deletions web/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,21 @@ def load_data():
raw_data = raw_data[raw_data.year >= 2000]

# necessary conversion to tuples, which is hashable type
# needed for grouping
for col in ["funder", "data_tags"]:
raw_data[col] = raw_data[col].apply(lambda x: tuple(x))
# needed for grouping.
# Also removes duplicates and removes leading and trailing spaces in values.
# Also replaces empty lists with ("None", ) to simplify the filtering and grouping in the dashboard
for col in ["funder", "affiliation_country", "data_tags"]:
raw_data[col] = raw_data[col].apply(
lambda x: ("None",)
if (len(x) == 0 or len(x) == 1 and x[0] == "")
else tuple(set([v.strip() for v in x]))
)

# convert to tuple, remove duplicates and remove leading and trailing spaces in countries names
raw_data["affiliation_country"] = raw_data["affiliation_country"].apply(
lambda x: tuple(set([v.strip() for v in x]))
)
# Filter out some distracting weird data
raw_data = raw_data[
raw_data["journal"]
!= "Acta Crystallographica Section E: Structure Reports Online"
]

return raw_data

Expand Down
21 changes: 10 additions & 11 deletions web/dashboard/main_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def did_change_extraction_tool(self):

## affiliation country
countries_with_count = self.get_col_values_with_count(
"affiliation_country", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
"affiliation_country", lambda x: "None" in x
)

def country_sorter(c):
Expand All @@ -236,7 +236,7 @@ def country_sorter(c):

## funder
funders_with_count = self.get_col_values_with_count(
"funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
"funder", lambda x: "None" in x
)

def funder_sorter(c):
Expand All @@ -248,7 +248,7 @@ def funder_sorter(c):

## Tags
tags_with_count = self.get_col_values_with_count(
"data_tags", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
"data_tags", lambda x: "None" in x
)

def tags_sorter(c):
Expand Down Expand Up @@ -300,7 +300,7 @@ def did_change_splitting_var(self):
# We want to show all countries, but pre-select only the top 10
countries_with_count = self.get_col_values_with_count(
"affiliation_country",
lambda x: len(x) == 0 or len(x) == 1 and x[0] == "",
lambda x: "None" in x,
)

# pre-filter the countries because there are a lot
Expand Down Expand Up @@ -331,7 +331,7 @@ def did_change_splitting_var(self):
if splitting_var == "funder":
# We want to show all funders, but pre-select only the top 10
funders_with_count = self.get_col_values_with_count(
"funder", lambda x: len(x) == 0 or len(x) == 1 and x[0] == ""
"funder", lambda x: "None" in x
)

top_5_min = sorted(
Expand Down Expand Up @@ -400,7 +400,7 @@ def filtered_grouped_data(self):
# the filter on countries is a bit different as the rows
# are list of countries
def country_filter(cell):
if cell is None:
if len(cell) == 0 or len(cell) == 1 and cell[0] == "":
return "None" in self.filter_affiliation_country
return any(c in self.filter_affiliation_country for c in cell)

Expand All @@ -424,22 +424,22 @@ def funder_filter(cell):
):
# the filter on tags is similar to the filter on countries
def tags_filter(cell):
if cell is None:
if len(cell) == 0 or len(cell) == 1 and cell[0] == "":
return "None" in self.filter_tags
return any(c in self.filter_tags for c in cell)

filtered_df = filtered_df[filtered_df.data_tags.apply(tags_filter)]

aggretations = {}
aggregations = {}
for field, aggs in dims_aggregations.items():
for agg in aggs:
aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])
aggregations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])

groupers = ["year"]
if self.splitting_var != "None":
groupers.append(self.splitting_var_from_label(self.splitting_var))

result = filtered_df.groupby(groupers).agg(**aggretations).reset_index()
result = filtered_df.groupby(groupers).agg(**aggregations).reset_index()

print("FILTERED_GROUPED_DATA_DONE", len(result))

Expand Down Expand Up @@ -705,7 +705,6 @@ def update_pubdate_slider(event):
)

def did_click_shortcut_button(event):
print(event)
if event.obj.name == "Last year":
self.start_pubdate_input.value, self.end_pubdate_input.value = (
str(datetime.now().year),
Expand Down

0 comments on commit 8c12246

Please sign in to comment.