Skip to content

Commit

Permalink
Merge branch 'ticket/PSB-7/dev' into rc/2.16.0
Browse files Browse the repository at this point in the history
  • Loading branch information
morriscb committed Nov 3, 2023
2 parents f8ea31b + 77737ec commit 5a66e26
Show file tree
Hide file tree
Showing 7 changed files with 284 additions and 136 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,18 @@
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession,
)
from allensdk.brain_observatory.ophys.project_constants import (
VBO_INTEGER_COLUMNS,
VBO_METADATA_COLUMN_ORDER,
)
from allensdk.core.dataframe_utils import (
enforce_df_column_order,
enforce_df_int_typing,
return_one_dataframe_row_only,
)
from allensdk.core.utilities import literal_col_eval

COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]
INTEGER_COLUMNS = [
"session_number",
"prior_exposures_to_image_set",
"ophys_session_id",
"imaging_plane_group_count",
"imaging_plane_group",
"targeted_areas",
"num_depths_per_area",
"num_targeted_structures",
]


def sanitize_data_columns(
Expand Down Expand Up @@ -194,7 +189,12 @@ def _get_ophys_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
df = enforce_df_column_order(
input_df=df, column_order=VBO_METADATA_COLUMN_ORDER
)
self._ophys_session_table = df.set_index("ophys_session_id")

def get_ophys_session_table(self) -> pd.DataFrame:
Expand All @@ -219,7 +219,12 @@ def _get_behavior_session_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
df = enforce_df_column_order(
input_df=df, column_order=VBO_METADATA_COLUMN_ORDER
)

self._behavior_session_table = df.set_index("behavior_session_id")

Expand Down Expand Up @@ -249,7 +254,12 @@ def _get_ophys_experiment_table(self):
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc="True"
)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, True)
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
df = enforce_df_column_order(
input_df=df, column_order=VBO_METADATA_COLUMN_ORDER
)
self._ophys_experiment_table = df.set_index("ophys_experiment_id")

def _get_ophys_cells_table(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,19 @@
from allensdk.brain_observatory.ecephys.ecephys_project_api.http_engine import ( # noqa: E501
HttpEngine,
)
from allensdk.brain_observatory.ophys.project_constants import (
VBO_INTEGER_COLUMNS,
VBO_METADATA_COLUMN_ORDER,
)
from allensdk.core.auth_config import (
LIMS_DB_CREDENTIAL_MAP,
MTRAIN_DB_CREDENTIAL_MAP,
)
from allensdk.core.authentication import DbCredentials
from allensdk.core.dataframe_utils import (
enforce_df_column_order,
enforce_df_int_typing,
)
from allensdk.internal.api import db_connection_creator
from allensdk.internal.api.queries.utils import (
build_in_list_selector_query,
Expand Down Expand Up @@ -384,7 +392,14 @@ def _get_ophys_experiment_table(self) -> pd.DataFrame:
.astype(int)
)
targeted_imaging_depth.columns = ["targeted_imaging_depth"]
return query_df.merge(targeted_imaging_depth, on="ophys_container_id")
df = query_df.merge(targeted_imaging_depth, on="ophys_container_id")
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
df = enforce_df_column_order(
input_df=df, column_order=VBO_METADATA_COLUMN_ORDER
)
return df

def _get_ophys_cells_table(self):
"""
Expand Down Expand Up @@ -427,8 +442,8 @@ def _get_ophys_cells_table(self):
df = self.lims_engine.select(query)

# NaN's for invalid cells force this to float, push to int
df["cell_specimen_id"] = pd.array(
df["cell_specimen_id"], dtype="Int64"
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
return df

Expand Down Expand Up @@ -491,17 +506,25 @@ def get_ophys_session_table(self) -> pd.DataFrame:
"""
# There is one ophys_session_id from 2018 that has multiple behavior
# ids, causing duplicates -- drop all dupes for now; # TODO
table = (
self._get_ophys_session_table()
.drop_duplicates(subset=["ophys_session_id"], keep=False)
.set_index("ophys_session_id")
table = self._get_ophys_session_table().drop_duplicates(
subset=["ophys_session_id"], keep=False
)
# Make date time explicitly UTC.
table["date_of_acquisition"] = pd.to_datetime(
table["date_of_acquisition"], utc=True
)

# Fill NaN values of imaging_plane_group_count with zero to match
# the behavior of the BehaviorOphysExperiment object.
im_plane_count = table["imaging_plane_group_count"].astype("Int64")
table["imaging_plane_group_count"] = im_plane_count
return table
table = enforce_df_int_typing(
input_df=table,
int_columns=VBO_INTEGER_COLUMNS,
use_pandas_type=True,
)
table = enforce_df_column_order(
input_df=table, column_order=VBO_METADATA_COLUMN_ORDER
)
return table.set_index("ophys_session_id")

def get_behavior_session(
self, behavior_session_id: int
Expand Down Expand Up @@ -530,9 +553,18 @@ def get_ophys_experiment_table(self) -> pd.DataFrame:
:rtype: pd.DataFrame
"""
df = self._get_ophys_experiment_table()
df["date_of_acquisition"] = pd.to_datetime(
df["date_of_acquisition"], utc=True
)
# Set type to pandas.Int64 to enforce integer typing and not revert to
# float.
df["imaging_plane_group"] = df["imaging_plane_group"].astype("Int64")
df = enforce_df_int_typing(
input_df=df, int_columns=VBO_INTEGER_COLUMNS, use_pandas_type=True
)
df = enforce_df_column_order(
input_df=df, column_order=VBO_METADATA_COLUMN_ORDER
)

return df.set_index("ophys_experiment_id")

def get_behavior_session_table(self) -> pd.DataFrame:
Expand All @@ -547,13 +579,20 @@ def get_behavior_session_table(self) -> pd.DataFrame:
acquisition date for behavior sessions (only in the stimulus pkl file)
"""
summary_tbl = self._get_behavior_summary_table()
# Query returns float typing of age_in_days. Convert to int to match
# typing of the Age data_object.
summary_tbl["age_in_days"] = summary_tbl["age_in_days"].astype("Int64")
# Add UTC time zone to match timezone from DateOfAcquisition object.
summary_tbl["date_of_acquisition"] = pd.to_datetime(
summary_tbl["date_of_acquisition"], utc=True
)
# Query returns float typing of age_in_days. Convert to int to match
# typing of the Age data_object.
summary_tbl = enforce_df_int_typing(
input_df=summary_tbl,
int_columns=VBO_INTEGER_COLUMNS,
use_pandas_type=True,
)
summary_tbl = enforce_df_column_order(
input_df=summary_tbl, column_order=VBO_METADATA_COLUMN_ORDER
)

return summary_tbl.set_index("behavior_session_id")

Expand Down
20 changes: 20 additions & 0 deletions allensdk/brain_observatory/ophys/project_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"imaging_depth",
"targeted_structure",
"targeted_imaging_depth",
"imaging_plane_group_count",
"imaging_plane_group",
"project_code",
"session_type",
Expand All @@ -79,6 +80,25 @@
"prior_exposures_to_omissions",
"date_of_acquisition",
"equipment_name",
"num_depths_per_area",
"ophys_experiment_id",
"num_targeted_structures",
"published_at",
"isi_experiment_id",
]


VBO_INTEGER_COLUMNS = [
"session_number",
"age_in_days",
"prior_exposures_to_image_set",
"prior_exposures_to_session_type",
"prior_exposures_to_omissions",
"ophys_session_id",
"imaging_plane_group_count",
"imaging_plane_group",
"targeted_areas",
"num_depths_per_area",
"num_targeted_structures",
"cell_specimen_id",
]
7 changes: 5 additions & 2 deletions allensdk/core/dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def patch_df_from_other(


def enforce_df_column_order(
input_df: pd.DataFrame, column_order: List[str]
input_df: pd.DataFrame,
column_order: List[str]
) -> pd.DataFrame:
"""Return the data frame but with columns ordered.
Expand Down Expand Up @@ -128,7 +129,9 @@ def enforce_df_column_order(


def enforce_df_int_typing(
input_df: pd.DataFrame, int_columns: List[str], use_pandas_type=False
input_df: pd.DataFrame,
int_columns: List[str],
use_pandas_type: object = False
) -> pd.DataFrame:
"""Enforce integer typing for columns that may have lost int typing when
combined into the final DataFrame.
Expand Down
Loading

0 comments on commit 5a66e26

Please sign in to comment.