Skip to content

Commit

Permalink
WIP : dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrotsmnrd committed Aug 22, 2024
1 parent cf218b7 commit 23deaf5
Show file tree
Hide file tree
Showing 3 changed files with 375 additions and 0 deletions.
117 changes: 117 additions & 0 deletions web_api/dashboard/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import os

import pandas as pd
import panel as pn
from main_dashboard import MainDashboard
from odmantic import SyncEngine
from pymongo import MongoClient
from ui import get_template

from osm import schemas


def flatten_dict(d):
"""
Recursively flattens a nested dictionary without prepending parent keys.
:param d: Dictionary to flatten.
:return: Flattened dictionary.
"""
items = []
for k, v in d.items():
if isinstance(v, dict):
# If the value is a dictionary, flatten it without the parent key
items.extend(flatten_dict(v).items())
else:
items.append((k, v))
return dict(items)


def load_data():
if "LOCAL_DATA_PATH" in os.environ:
return pd.read_feather(os.environ["LOCAL_DATA_PATH"])
client = MongoClient(os.environ["MONGODB_URI"])
engine = SyncEngine(client=client, database="osm")
matches = (
engine.get_collection(schemas.Invocation)
.aggregate(
[
{
"$match": {
"osm_version": {"$eq": "0.0.1"},
# "work.pmid": {"$regex":r"^2"},
"metrics.year": {"$gt": 2000},
# "metrics.is_data_pred": {"$eq": True},
},
},
{
"$project": {
# "osm_version": True,
# "user_comment": True,
# "client.compute_context_id": True,
"work.user_defined_id": True,
"metrics.year": True,
"metrics.is_code_pred": True,
"metrics.is_data_pred": True,
"metrics.affiliation_country": True,
"metrics.score": True,
"metrics.eigenfactor_score": True,
"metrics.fund_pmc_anysource": True,
"metrics.fund_pmc_institute": True,
"metrics.fund_pmc_source": True,
"metrics.journal": True,
},
},
]
)
.__iter__()
)
return pd.DataFrame(flatten_dict(match) for match in matches)


def dashboard_page():
template = get_template()

dashboard = MainDashboard(pn.state.cache["data"])

template.main.append(dashboard.get_dashboard)
template.sidebar.append(dashboard.get_sidebar)

return template


def on_load():
"""
Add resource intensive things that you only want to run once.
"""
pn.config.browser_info = True
pn.config.notifications = True
raw_data = load_data()
raw_data = raw_data[raw_data != 999999]

# Harcoded for now, will be added to the raw data later
raw_data["metrics"] = "RTransparent"

pn.state.cache["data"] = raw_data


if __name__ == "__main__":
# Runs all the things necessary before the server actually starts.
pn.state.onload(on_load)
print("starting dashboard!")
pn.serve(
{"/": dashboard_page},
address="0.0.0.0",
port=8501,
start=True,
location=True,
show=False,
keep_alive=30 * 1000, # 30s
autoreload=True,
admin=True,
profiler="pyinstrument",
allow_websocket_origin=[
"localhost:8501",
"osm.pythonaisolutions.com",
],
)
187 changes: 187 additions & 0 deletions web_api/dashboard/main_dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import holoviews as hv
import pandas as pd
import panel as pn
import param

pn.extension()


pd.options.display.max_columns = None

# filters = {
# "journal" : "category",
# "metrics" : "select",
# }


groups = {"year": "int"}


dims_aggregations = {
"is_data_pred": ["percent", "count_true", "count"],
"is_code_pred": ["percent", "count_true"],
"score": ["mean"],
"eigenfactor_score": ["mean"],
}


aggregation_formulas = {
"percent": lambda x: x.mean() * 100,
"count_true": lambda x: (x == True).sum(), # noqa
"count": "count",
"mean": "mean",
}


class MainDashboard(param.Parameterized):
"""
Main dashboard for the application.
"""

select_metrics = param.Selector(
default="RTransparent", objects=["RTransparent"], label=""
)

grouping_var = param.Selector(
default="year", objects=["year", "fund_pmc_institute"], label=""
)

filter_journal = param.Selector(
default="All journals (including empty)",
objects=[
"All journals (including empty)",
"All journals (excluding empty values)",
"Only selected journals",
],
label="Journal",
)
filter_selected_journals = param.ListSelector(default=[], objects=[], label="")

def __init__(self, raw_data, **params):
super().__init__(**params)

self.raw_data = raw_data

self.param.filter_selected_journals.objects = self.raw_data.journal.unique()
# As default, takes the journals with the biggest number of occurences
self.filter_selected_journals = list(
self.raw_data.journal.value_counts().iloc[:10].index
)

def filtered_grouped_data(self):
filters = []

if self.filter_journal == "All journals (excluding empty values)":
filters.append(("journal.notnull()"))
elif self.filter_journal == "Only selected journals":
filters.append(f"journal in {self.filter_selected_journals}")

filtered_df = self.raw_data.query(*filters) if filters else self.raw_data

aggretations = {}
for field, aggs in dims_aggregations.items():
for agg in aggs:
aggretations[f"{agg}_{field}"] = (field, aggregation_formulas[agg])

result = (
filtered_df.groupby(self.grouping_var).agg(**aggretations).reset_index()
)

return result

@pn.depends("select_metrics", "filter_journal")
def get_sidebar(self):
items = [
pn.pane.Markdown("## Filters"),
pn.pane.Markdown("### Metrics extraction tool"),
pn.widgets.Select.from_param(self.param.select_metrics),
pn.layout.Divider(),
pn.pane.Markdown("### Grouping"),
pn.widgets.Select.from_param(self.param.grouping_var),
pn.layout.Divider(),
pn.pane.Markdown("### Filters"),
pn.widgets.Select.from_param(self.param.filter_journal),
]

if self.filter_journal == "Only selected journals":
items.append(
pn.widgets.MultiChoice.from_param(
self.param.filter_selected_journals, max_items=10
)
)

sidebar = pn.Column(*items)

return sidebar

@pn.depends(
"select_metrics", "filter_journal", "filter_selected_journals", "grouping_var"
)
def get_dashboard(self):
df = self.filtered_grouped_data()

# Create charts
fig_data_curve = hv.Curve(
df,
kdims=[self.grouping_var],
vdims=[
"percent_is_data_pred",
],
).opts(color="red")

fig_code_curve = hv.Curve(
df,
kdims=[self.grouping_var],
vdims=[
"percent_is_code_pred",
],
).opts(color="lightblue")

fig_data_points = hv.Points(
df,
kdims=[self.grouping_var, "percent_is_data_pred"],
).opts(
tools=["hover"],
color="red",
size=5,
hover_tooltips=[
(self.grouping_var, f"@{self.grouping_var}"),
("% is_data_prep", "@percent_is_data_pred"),
("Total is_data_prep", "@count_true_is_data_pred"),
("nbr_publications", "@count_is_data_pred"),
],
)

fig_code_points = hv.Points(
df,
kdims=[self.grouping_var, "percent_is_code_pred"],
).opts(
tools=["hover"],
color="lightblue",
size=5,
hover_tooltips=[
(self.grouping_var, f"@{self.grouping_var}"),
("% is_code_prep", "@percent_is_code_pred"),
("Total is_code_prep", "@count_true_is_code_pred"),
("nbr_publications", "@count_is_data_pred"),
],
)

plot = (
fig_code_curve * fig_data_curve * fig_data_points * fig_code_points
).opts(
title="",
xlabel=self.grouping_var,
ylabel="Percentage",
width=800,
height=400,
legend_position="top_left",
)

# Layout the dashboard
dashboard = pn.Column(
"# Data and code transparency",
pn.Column(plot, sizing_mode="stretch_width"),
)

return dashboard
71 changes: 71 additions & 0 deletions web_api/dashboard/ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import panel as pn


def connection_monitor():
connection_monitor = pn.pane.HTML(
"""
<script>
const originalSend = WebSocket.prototype.send;
window.sockets = [];
WebSocket.prototype.send = function(...args) {
if (window.sockets.indexOf(this) === -1)
window.sockets.push(this);
return originalSend.call(this, ...args);
};
console.log(window.sockets);
const polling = setInterval(function() {
if ( window.sockets.length > 0 ){
if ( window.sockets[0].readyState >= 2 ){
let div = document.createElement('div');
div.style.color = 'white';
div.style.backgroundColor= 'crimson';
div.style.padding = '10px 10px 10px 10px';
div.style.textAlign= 'center';
let text = document.createTextNode('Bokeh session has expired. Please reload.');
div.appendChild(text);
window.document.body.insertBefore(
div,
window.document.body.firstChild
);
clearInterval(polling);
}
}
}, 5000);
</script>
"""
)

return connection_monitor


def get_template():
"""
Returns a Panel template with the given title,
with its menu and other header items.
"""

template = pn.template.FastListTemplate(
site="NIH",
title="OpenSciMetrics",
favicon="https://www.nih.gov/favicon.ico",
sidebar=[],
)

template.header.append(
connection_monitor(),
)

return template

0 comments on commit 23deaf5

Please sign in to comment.