Skip to content

Commit

Permalink
refactor/fix: splitting out project view functionality for future exp…
Browse files Browse the repository at this point in the history
…ansion
  • Loading branch information
dbirman committed Dec 10, 2024
1 parent 843cf31 commit 56ad53a
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 138 deletions.
37 changes: 35 additions & 2 deletions src/aind_qc_portal/docdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ def get_all():


@pn.cache(ttl=TIMEOUT_1H)
def get_project(project: str):
filter = {"data_description.project_name": project}
def get_project(project_name: str):
filter = {"data_description.project_name": project_name}
limit = 0
paginate_batch_size = 500
response = client.retrieve_docdb_records(
Expand All @@ -145,6 +145,9 @@ def get_project(project: str):
"session.session_start_time": 1,
"data_description.data_level": 1,
"data_description.project_name": 1,
"rig.rig_id": 1,
"session.experimenter_full_name": 1,
"quality_control": 1,
},
limit=limit,
paginate_batch_size=paginate_batch_size,
Expand All @@ -153,6 +156,36 @@ def get_project(project: str):
return response


@pn.cache(ttl=TIMEOUT_1H)
def get_project_custom(project_name: str, fields: list):
"""Get all records that match a project name, with custom fields
Parameters
----------
project_name : str
fields : list
List of fields to retain from DocDB record
Returns
-------
list
List of dictionaries containing the fields requested
"""
filter = {"data_description.project_name": project_name}
limit = 0
paginate_batch_size = 500
response = client.retrieve_docdb_records(
filter_query=filter,
projection={
"_id": 1,
} | {field: 1 for field in fields},
limit=limit,
paginate_batch_size=paginate_batch_size,
)

return response


@pn.cache
def get_subjects():
filter = {
Expand Down
2 changes: 1 addition & 1 deletion src/aind_qc_portal/panel/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def metric_panel(self):
options=["Pass", "Fail", "Pending"],
name="Metric status",
)

if pn.state.user == "guest":
self.state_selector.disabled = True
else:
Expand Down
9 changes: 7 additions & 2 deletions src/aind_qc_portal/panel/quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,20 @@ def status_panel(self):

def panel(self):
"""Build a Panel object representing this QC action"""
if not self._has_data or not self._data:
return pn.widgets.StaticText(value="No QC object available")

# build the header
md = f"""
<span style="font-size:14pt">Quality control for {self.asset_name}</span>
"""
header = pn.pane.Markdown(md)

if not self._has_data or not self._data:
return pn.Row(
pn.HSpacer(),
pn.Column(header,
pn.widgets.StaticText(value="No QC object available", styles={"font-size": "22pt"}), styles=OUTER_STYLE),
pn.HSpacer())

# build the display box: this shows the current state in DocDB of this asset
# if any evaluations are failing, we'll show a warning
failing_eval_str = ""
Expand Down
114 changes: 114 additions & 0 deletions src/aind_qc_portal/projects/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import pandas as pd
import param

from aind_qc_portal.docdb.database import get_project, get_project_custom
from aind_qc_portal.utils import format_link, qc_color
from aind_data_schema.core.quality_control import QualityControl, Status

class ProjectDataset(param.Parameterized):
"""Generic dataset class, loads default data for all projects"""
subject_filter = param.String(default="")

def __init__(self, project_name: str):
"""Create a ProjectDataset object"""

self.project_name = project_name
self._df = None
self.exposed_columns = [
"subject_id", "Date", "name", "Operator", "S3 link", "Status", "Subject view", "QC view", "session_type", "raw"
]
self._get_assets()

def _get_assets(self):
"""Get all assets with this project name"""
print(self.project_name)
records = get_project(self.project_name)

data = []
for record in records:
subject_id = record.get('subject', {}).get('subject_id')

# rig, operator, QC notes get bubbled up? qc status,
# custom genotype mapping. Do this for learning-mfish

# reconstruct the QC object, if possible
if record.get('quality_control'):
qc = QualityControl(**record.get('quality_control'))
else:
qc = None

record_data = {
'_id': record.get('_id'),
'raw': record.get('data_description', {}).get('data_level') == 'raw',
'project_name': record.get('data_description', {}).get('project_name'),
'location': record.get('location'),
'name': record.get('name'),
'session_start_time': record.get('session', {}).get('session_start_time'),
'session_type': record.get('session', {}).get('session_type'),
'subject_id': subject_id,
'operator': list(record.get('session', {}).get('experimenter_full_name')),
'Status': qc.status().value if qc else "No QC",
}
data.append(record_data)

if len(data) == 0:
self._df = None
return

self._df = pd.DataFrame(data)
self._df["timestamp"] = pd.to_datetime(self._df["session_start_time"], format='mixed', utc=True)
self._df["Date"] = self._df["timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S")
self._df["S3 link"] = self._df["location"].apply(lambda x: format_link(x, text="S3 link"))
self._df["Subject view"] = self._df["_id"].apply(lambda x: format_link(f"/qc_asset_app?id={x}"))
self._df["qc_link"] = self._df["_id"].apply(lambda x: f"/qc_app?id={x}")
self._df["QC view"] = self._df.apply(lambda row: format_link(row["qc_link"]), axis=1)
self._df["Operator"] = self._df["operator"].apply(lambda x: ", ".join(x))
self._df.sort_values(by="timestamp", ascending=True, inplace=True)
self._df.sort_values(by="subject_id", ascending=True, inplace=True)

def filtered_data(self):
if self.subject_filter:
filtered_df = self._df[self._df["subject_id"].str.contains(self.subject_filter, case=False, na=False)]
else:
filtered_df = self._df

return filtered_df[self.exposed_columns]

@property
def data(self):

return self.filtered_data()[self.exposed_columns].style.map(qc_color, subset=["Status"])


@property
def timestamp_data(self):
if self.subject_filter:
filtered_df = self._df[self._df["subject_id"].str.contains(self.subject_filter, case=False, na=False)]
else:
filtered_df = self._df

return filtered_df[["timestamp"]]


class LearningmFishDataset(ProjectDataset):

def __init__(self, project_name: str):
if project_name != "Learning mFISH-V1omFISH":
raise ValueError("This class is only for Learning mFISH-V1omFISH")

super().__init__(project_name=project_name)

self._get_mfish_assets()

def _get_mfish_assets(self):
"""Load additional information needed for the Learning mFISH-V1omFISH project
Extra data should be appended to the self._df dataframe and then needs to be added to the
list of exposed columns.
"""
data = get_project_custom(self.project_name, [""])


mapping = {
"Learning mFISH-V1omFISH": LearningmFishDataset,
}
103 changes: 103 additions & 0 deletions src/aind_qc_portal/projects/project_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from typing import Optional
import pandas as pd
import panel as pn
import altair as alt

from aind_qc_portal.projects.dataset import mapping, ProjectDataset
from aind_qc_portal.utils import df_timestamp_range, OUTER_STYLE
from aind_qc_portal.utils import OUTER_STYLE, AIND_COLORS


class ProjectView():

def __init__(self, project_name: str):
cls = mapping.get(project_name, ProjectDataset)
self.dataset = cls(project_name=project_name)
self.project_name = project_name

@property
def has_data(self):
return self.dataset.data is not None

def get_subjects(self):
if not self.has_data:
return []

return self.dataset._df["subject_id"].unique()

def get_data(self) -> Optional[pd.DataFrame]:
if not self.has_data:
return None

return self.dataset.filtered_data()

def get_data_styled(self):
if not self.has_data:
return None

return self.dataset.data

def history_panel(self):
"""Create a plot showing the history of this asset, showing how assets were derived from each other"""
if not self.has_data:
return pn.widgets.StaticText(
value=f"No data found for project: {self.project_name}"
)

# Calculate the time range to show on the x axis
(min_range, max_range, range_unit, format) = df_timestamp_range(
self.dataset.timestamp_data
)

chart = (
alt.Chart(self.get_data())
.mark_bar()
.encode(
x=alt.X(
"Date:T",
title="Time",
scale=alt.Scale(domain=[min_range, max_range]),
axis=alt.Axis(format=format, tickCount=range_unit),
),
y=alt.Y("subject_id:N", title="Subject ID"),
tooltip=[
"subject_id",
"session_type",
"Date",
],
color=alt.Color("subject_id:N"),
href="qc_link:N",
)
.properties(width=900)
)

return pn.pane.Vega(chart, sizing_mode="stretch_width", styles=OUTER_STYLE)

def panel(self) -> pn.Column:
"""Return panel object"""

md = f"""
<h1 style="color:{AIND_COLORS["dark_blue"]};">
{self.project_name}
</h1>
<b>{len(self.dataset.filtered_data())}</b> data assets are associated with this project.
"""

header = pn.pane.Markdown(md, width=1000, styles=OUTER_STYLE)

chart_pane = self.history_panel()

df_pane = pn.pane.DataFrame(self.get_data_styled(), width=950, escape=False, index=False)

def update_subject_filter(event):
self.dataset.subject_filter = event.new
df_pane.object = self.get_data()

subject_filter = pn.widgets.Select(name="Subject filter", options=list(self.get_subjects()))
subject_filter.param.watch(update_subject_filter, "value")

df_col = pn.Column(subject_filter, df_pane, styles=OUTER_STYLE)

col = pn.Column(header, chart_pane, df_col)

return col
Loading

0 comments on commit 56ad53a

Please sign in to comment.