Merge branch 'ticket/PSB-167/addQualityToMetadataTable' into rc/2.16.0

AllenInstitute · Aug 21, 2023 · a3aa0cf · a3aa0cf
2 parents e768fcc + 2c9dc0c
commit a3aa0cf
Show file tree

Hide file tree

Showing 23 changed files with 3,602 additions and 2,265 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+extend-ignore = E203
diff --git a/...ior/behavior_project_cache/project_apis/data_io/behavior_neuropixels_project_cloud_api.py b/...ior/behavior_project_cache/project_apis/data_io/behavior_neuropixels_project_cloud_api.py
@@ -6,18 +6,28 @@
 from allensdk.brain_observatory.behavior.behavior_session import (
     BehaviorSession,
 )
+from allensdk.brain_observatory.ecephys._probe import ProbeWithLFPMeta
 from allensdk.brain_observatory.ecephys.behavior_ecephys_session import (
     BehaviorEcephysSession,
 )
-from allensdk.brain_observatory.ecephys._probe import ProbeWithLFPMeta
+from allensdk.core.dataframe_utils import (
+    enforce_df_int_typing,
+    return_one_dataframe_row_only,
+)
 
+INTEGER_COLUMNS = [
+    "prior_exposures_to_image_set",
+    "ecephys_session_id",
+    "unit_count",
+    "probe_count",
+    "channel_count",
+]
 
-class VisualBehaviorNeuropixelsProjectCloudApi(ProjectCloudApiBase):
 
+class VisualBehaviorNeuropixelsProjectCloudApi(ProjectCloudApiBase):
     MANIFEST_COMPATIBILITY = ["0.1.0", "10.0.0"]
 
     def _load_manifest_tables(self):
-
         self._get_ecephys_session_table()
         self._get_behavior_session_table()
         self._get_unit_table()
@@ -44,36 +54,22 @@ def get_behavior_session(
         -------
         BehaviorSession
         """
-        row = self._behavior_session_table.query(
-            f"behavior_session_id=={behavior_session_id}"
+        row = return_one_dataframe_row_only(
+            input_table=self._behavior_session_table,
+            index_value=behavior_session_id,
+            table_name="behavior_session_table",
         )
-        if row.shape[0] != 1:
-            raise RuntimeError(
-                "The behavior_session_table should have "
-                "1 and only 1 entry for a given "
-                "behavior_session_id. For "
-                f"{behavior_session_id} "
-                f" there are {row.shape[0]} entries."
-            )
-
         row = row.squeeze()
         ecephys_session_id = row.ecephys_session_id
         # If a file_id for the behavior session is not set, attempt to load
         # an associated ecephys session.
         if row[self.cache.file_id_column] < 0 or np.isnan(
             row[self.cache.file_id_column]
         ):
-            row = self._ecephys_session_table.query(
-                f"index=={ecephys_session_id}"
-            )
-
-        if len(row) == 0:
-            raise RuntimeError(
-                f"behavior_session: {behavior_session_id} "
-                f"corresponding to "
-                f"ecephys_session: {ecephys_session_id}"
-                f"does not exist in the behavior_session  "
-                "or ecephys_session tables."
+            row = return_one_dataframe_row_only(
+                input_table=self._ecephys_session_table,
+                index_value=ecephys_session_id,
+                table_name="ecephys_session_table",
             )
 
         file_id = str(int(row[self.cache.file_id_column]))
@@ -84,7 +80,6 @@ def get_behavior_session(
     def get_ecephys_session(
         self, ecephys_session_id: int
     ) -> BehaviorEcephysSession:
-
         """get a BehaviorEcephysSession by specifying ecephys_session_id
 
         Parameters
@@ -97,21 +92,15 @@ def get_ecephys_session(
         BehaviorEcephysSession
 
         """
-        session_meta = self._ecephys_session_table.query(
-            f"index=={ecephys_session_id}"
+        session_meta = return_one_dataframe_row_only(
+            input_table=self._ecephys_session_table,
+            index_value=ecephys_session_id,
+            table_name="ecephys_session_table",
         )
         probes_meta = self._probe_table[
             (self._probe_table["ecephys_session_id"] == ecephys_session_id)
             & (self._probe_table["has_lfp_data"])
         ]
-        if session_meta.shape[0] != 1:
-            raise RuntimeError(
-                "The behavior_ecephys_session_table should "
-                "have 1 and only 1 entry for a given "
-                f"ecephys_session_id. For "
-                f"{ecephys_session_id} "
-                f" there are {session_meta.shape[0]} entries."
-            )
         session_file_id = str(int(session_meta[self.cache.file_id_column]))
         session_data_path = self._get_data_path(file_id=session_file_id)
 
@@ -133,10 +122,9 @@ def f():
             probe_meta = {
                 p.name: ProbeWithLFPMeta(
                     lfp_csd_filepath=make_lazy_load_filepath_function(
-                        file_id=str(int(getattr(
-                            p, self.cache.file_id_column)))
-                        ),
-                    lfp_sampling_rate=p.lfp_sampling_rate
+                        file_id=str(int(getattr(p, self.cache.file_id_column)))
+                    ),
+                    lfp_sampling_rate=p.lfp_sampling_rate,
                 )
                 for p in probes_meta.itertuples(index=False)
             }
@@ -149,6 +137,7 @@ def f():
     def _get_ecephys_session_table(self):
         session_table_path = self._get_metadata_path(fname="ecephys_sessions")
         df = pd.read_csv(session_table_path)
+        df = enforce_df_int_typing(df, INTEGER_COLUMNS, use_pandas_type=True)
         self._ecephys_session_table = df.set_index("ecephys_session_id")
 
     def get_ecephys_session_table(self) -> pd.DataFrame:
@@ -161,6 +150,7 @@ def get_ecephys_session_table(self) -> pd.DataFrame:
     def _get_behavior_session_table(self):
         session_table_path = self._get_metadata_path(fname="behavior_sessions")
         df = pd.read_csv(session_table_path)
+        df = enforce_df_int_typing(df, INTEGER_COLUMNS, use_pandas_type=True)
         self._behavior_session_table = df.set_index("behavior_session_id")
 
     def get_behavior_session_table(self) -> pd.DataFrame:

diff --git a/...vatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py b/...vatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py
@@ -13,16 +13,23 @@
 from allensdk.brain_observatory.behavior.behavior_session import (
     BehaviorSession,
 )
-from allensdk.core.utilities import literal_col_eval
 from allensdk.core.dataframe_utils import (
-    enforce_df_int_typing
+    enforce_df_int_typing,
+    return_one_dataframe_row_only,
 )
+from allensdk.core.utilities import literal_col_eval
 
 COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]
-INTEGER_COLUMNS = ["session_number", "prior_exposures_to_image_set",
-                   "ophys_session_id", "imaging_plane_group_count",
-                   "imaging_plane_group", "targeted_areas",
-                   "num_depths_per_area", "num_targeted_structures"]
+INTEGER_COLUMNS = [
+    "session_number",
+    "prior_exposures_to_image_set",
+    "ophys_session_id",
+    "imaging_plane_group_count",
+    "imaging_plane_group",
+    "targeted_areas",
+    "num_depths_per_area",
+    "num_targeted_structures",
+]
 
 
 def sanitize_data_columns(
@@ -103,23 +110,23 @@ def get_behavior_session(
         from the nwb file for the first-listed ophys_experiment.
 
         """
-        row = self._behavior_session_table.query(
-            f"behavior_session_id=={behavior_session_id}"
+        row = return_one_dataframe_row_only(
+            input_table=self._behavior_session_table,
+            index_value=behavior_session_id,
+            table_name="behavior_session_table",
         )
-        if row.shape[0] != 1:
-            raise RuntimeError(
-                "The behavior_session_table should have "
-                "1 and only 1 entry for a given "
-                "behavior_session_id. For "
-                f"{behavior_session_id} "
-                f" there are {row.shape[0]} entries."
-            )
         row = row.squeeze()
-        has_file_id = (not pd.isna(row[self.cache.file_id_column])
-                       and row[self.cache.file_id_column] > 0)
+        has_file_id = (
+            not pd.isna(row[self.cache.file_id_column])
+            and row[self.cache.file_id_column] > 0
+        )
         if not has_file_id:
             oeid = row.ophys_experiment_id[0]
-            row = self._ophys_experiment_table.query(f"index=={oeid}")
+            row = return_one_dataframe_row_only(
+                input_table=self._ophys_experiment_table,
+                index_value=oeid,
+                table_name="ophys_experiment_table",
+            )
         file_id = str(int(row[self.cache.file_id_column]))
         data_path = self._get_data_path(file_id=file_id)
         return BehaviorSession.from_nwb_path(nwb_path=str(data_path))
@@ -139,17 +146,11 @@ def get_behavior_ophys_experiment(
         BehaviorOphysExperiment
 
         """
-        row = self._ophys_experiment_table.query(
-            f"index=={ophys_experiment_id}"
+        row = return_one_dataframe_row_only(
+            input_table=self._ophys_experiment_table,
+            index_value=ophys_experiment_id,
+            table_name="ophys_experiment_table",
         )
-        if row.shape[0] != 1:
-            raise RuntimeError(
-                "The behavior_ophys_experiment_table should "
-                "have 1 and only 1 entry for a given "
-                f"ophys_experiment_id. For "
-                f"{ophys_experiment_id} "
-                f" there are {row.shape[0]} entries."
-            )
         file_id = str(int(row[self.cache.file_id_column]))
         data_path = self._get_data_path(file_id=file_id)
         return BehaviorOphysExperiment.from_nwb_path(str(data_path))

diff --git a/...rain_observatory/behavior/behavior_project_cache/tables/util/prior_exposure_processing.py b/...rain_observatory/behavior/behavior_project_cache/tables/util/prior_exposure_processing.py
@@ -171,6 +171,7 @@ def __get_prior_exposure_count(
     elif agg_method == "cumsum":
         df["to"] = to
         df_index_name = df.index.name
+
         def cumsum(x):
             return x.cumsum().shift(fill_value=0).astype("int64")
 
@@ -184,8 +185,7 @@ def cumsum(x):
     return counts.reindex(index)
 
 
-def add_experience_level_ophys(
-        input_df: pd.DataFrame) -> pd.DataFrame:
+def add_experience_level_ophys(input_df: pd.DataFrame) -> pd.DataFrame:
     """
     adds a column to ophys tables that contains a string
     indicating whether a session had exposure level of Familiar,
@@ -210,36 +210,34 @@ def add_experience_level_ophys(
 
     # do not modify in place
     table = input_df.copy(deep=True)
-    session_number = 'session_number' \
-        if 'session_number' in table.columns else 'session'
+    session_number = (
+        "session_number" if "session_number" in table.columns else "session"
+    )
 
     # add experience_level column with strings indicating relevant conditions
-    table['experience_level'] = 'None'
+    table["experience_level"] = "None"
 
-    session_training = table.session_type.str.startswith('TRAINING')
+    session_training = table.session_type.str.startswith("TRAINING")
     train_indices = table[session_training].index.values
-    table.loc[train_indices, 'experience_level'] = 'Training'
+    table.loc[train_indices, "experience_level"] = "Training"
 
     session_0123 = table[session_number].isin([0, 1, 2, 3])
     familiar_indices = table[session_0123].index.values
 
-    table.loc[familiar_indices, 'experience_level'] = 'Familiar'
+    table.loc[familiar_indices, "experience_level"] = "Familiar"
 
     session_456 = table[session_number].isin([4, 5, 6])
-    zero_prior_exp = (table.prior_exposures_to_image_set == 0)
+    zero_prior_exp = table.prior_exposures_to_image_set == 0
 
-    novel_indices = table[session_456
-                          & zero_prior_exp].index.values
+    novel_indices = table[session_456 & zero_prior_exp].index.values
 
-    table.loc[novel_indices, 'experience_level'] = 'Novel 1'
+    table.loc[novel_indices, "experience_level"] = "Novel 1"
 
     session_456 = table[session_number].isin([4, 5, 6])
-    nonzero_prior_exp = (table.prior_exposures_to_image_set != 0)
-    novel_gt_1_indices = table[
-                             session_456
-                             & nonzero_prior_exp].index.values
+    nonzero_prior_exp = table.prior_exposures_to_image_set != 0
+    novel_gt_1_indices = table[session_456 & nonzero_prior_exp].index.values
 
-    table.loc[novel_gt_1_indices, 'experience_level'] = 'Novel >1'
+    table.loc[novel_gt_1_indices, "experience_level"] = "Novel >1"
 
     return table