datajoint · dimitri-yatsenko · Jan 11, 2022 · May 1, 2021 · May 1, 2021 · Aug 18, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# User data
+.DS_Store
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
 # DataJoint Element - Array Electrophysiology Element
-DataJoint Element for array electrophysiology.
 
 This repository features DataJoint pipeline design for extracellular array electrophysiology, 
 with ***Neuropixels*** probe and ***kilosort*** spike sorting method. 
@@ -45,14 +44,24 @@ This ephys element features automatic ingestion for spike sorting results from t
     + ***WaveformSet*** - A set of spike waveforms for units from a given CuratedClustering
 
 ## Installation
-```
-pip install element-array-ephys
-```
-
-If you already have an older version of ***element-array-ephys*** installed using `pip`, upgrade with
-```
-pip install --upgrade element-array-ephys
-```
+
++ Install `element-array-ephys`
+    ```
+    pip install element-array-ephys
+    ```
+
++ Upgrade `element-array-ephys` previously installed with `pip`
+    ```
+    pip install --upgrade element-array-ephys
+    ```
+
++ Install `element-data-loader`
+
+    + `element-data-loader` is a dependency of `element-array-ephys`, however it is not contained within `requirements.txt`.
+
+    ```
+    pip install "element-data-loader @ git+https://github.com/datajoint/element-data-loader"
+    ```
 
 ## Usage
 

diff --git a/element_array_ephys/__init__.py b/element_array_ephys/__init__.py
@@ -1,69 +0,0 @@
-import datajoint as dj
-import pathlib
-import uuid
-import hashlib
-
-
-dj.config['enable_python_native_blobs'] = True
-
-
-def find_full_path(root_directories, relative_path):
-    """
-    Given a relative path, search and return the full-path
-     from provided potential root directories (in the given order)
-        :param root_directories: potential root directories
-        :param relative_path: the relative path to find the valid root directory
-        :return: root_directory (pathlib.Path object)
-    """
-    relative_path = pathlib.Path(relative_path)
-
-    if relative_path.exists():
-        return relative_path
-
-    # turn to list if only a single root directory is provided
-    if isinstance(root_directories, (str, pathlib.Path)):
-        root_directories = [root_directories]
-
-    for root_dir in root_directories:
-        if (pathlib.Path(root_dir) / relative_path).exists():
-            return pathlib.Path(root_dir) / relative_path
-
-    raise FileNotFoundError('No valid full-path found (from {})'
-                            ' for {}'.format(root_directories, relative_path))
-
-
-def find_root_directory(root_directories, full_path):
-    """
-    Given multiple potential root directories and a full-path,
-    search and return one directory that is the parent of the given path
-        :param root_directories: potential root directories
-        :param full_path: the relative path to search the root directory
-        :return: full-path (pathlib.Path object)
-    """
-    full_path = pathlib.Path(full_path)
-
-    if not full_path.exists():
-        raise FileNotFoundError(f'{full_path} does not exist!')
-
-    # turn to list if only a single root directory is provided
-    if isinstance(root_directories, (str, pathlib.Path)):
-        root_directories = [root_directories]
-
-    try:
-        return next(pathlib.Path(root_dir) for root_dir in root_directories
-                    if pathlib.Path(root_dir) in set(full_path.parents))
-
-    except StopIteration:
-        raise FileNotFoundError('No valid root directory found (from {})'
-                                ' for {}'.format(root_directories, full_path))
-
-
-def dict_to_uuid(key):
-    """
-    Given a dictionary `key`, returns a hash string as UUID
-    """
-    hashed = hashlib.md5()
-    for k, v in sorted(key.items()):
-        hashed.update(str(k).encode())
-        hashed.update(str(v).encode())
-    return uuid.UUID(hex=hashed.hexdigest())

diff --git a/element_array_ephys/ephys.py b/element_array_ephys/ephys.py
@@ -4,9 +4,10 @@
 import numpy as np
 import inspect
 import importlib
+from element_data_loader.utils import find_root_directory, find_full_path, dict_to_uuid
 
 from .readers import spikeglx, kilosort, openephys
-from . import probe, find_full_path, find_root_directory, dict_to_uuid
+from . import probe
 
 schema = dj.schema()
 
@@ -46,7 +47,6 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True,
     global _linking_module
     _linking_module = linking_module
 
-    # activate
     probe.activate(probe_schema_name, create_schema=create_schema,
                    create_tables=create_tables)
     schema.activate(ephys_schema_name, create_schema=create_schema,
@@ -57,9 +57,10 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True,
 
 def get_ephys_root_data_dir() -> list:
     """
-    All data paths, directories in DataJoint Elements are recommended to be stored as
-    relative paths, with respect to some user-configured "root" directory,
-     which varies from machine to machine (e.g. different mounted drive locations)
+    All data paths, directories in DataJoint Elements are recommended to be 
+    stored as relative paths, with respect to some user-configured "root" 
+    directory, which varies from machine to machine (e.g. different mounted 
+    drive locations)
 
     get_ephys_root_data_dir() -> list
         This user-provided function retrieves the possible root data directories
@@ -78,7 +79,7 @@ def get_session_directory(session_key: dict) -> str:
         Retrieve the session directory containing the
          recorded Neuropixels data for a given Session
         :param session_key: a dictionary of one Session `key`
-        :return: a string for full path to the session directory
+        :return: a string for relative or full path to the session directory
     """
     return _linking_module.get_session_directory(session_key)
 
@@ -140,14 +141,16 @@ class EphysFile(dj.Part):
         """
 
     def make(self, key):
-        sess_dir = pathlib.Path(get_session_directory(key))
+
+        session_dir = find_full_path(get_ephys_root_data_dir(), 
+                                     get_session_directory(key))
 
         inserted_probe_serial_number = (ProbeInsertion * probe.Probe & key).fetch1('probe')
 
         # search session dir and determine acquisition software
         for ephys_pattern, ephys_acq_type in zip(['*.ap.meta', '*.oebin'],
                                                  ['SpikeGLX', 'Open Ephys']):
-            ephys_meta_filepaths = [fp for fp in sess_dir.rglob(ephys_pattern)]
+            ephys_meta_filepaths = [fp for fp in session_dir.rglob(ephys_pattern)]
             if ephys_meta_filepaths:
                 acq_software = ephys_acq_type
                 break
@@ -187,12 +190,13 @@ def make(self, key):
                           'acq_software': acq_software,
                           'sampling_rate': spikeglx_meta.meta['imSampRate']})
 
-            root_dir = find_root_directory(get_ephys_root_data_dir(), meta_filepath)
+            root_dir = find_root_directory(get_ephys_root_data_dir(), 
+                                           meta_filepath)
             self.EphysFile.insert1({
                 **key,
                 'file_path': meta_filepath.relative_to(root_dir).as_posix()})
         elif acq_software == 'Open Ephys':
-            dataset = openephys.OpenEphys(sess_dir)
+            dataset = openephys.OpenEphys(session_dir)
             for serial_number, probe_data in dataset.probes.items():
                 if str(serial_number) == inserted_probe_serial_number:
                     break
-            for serial_number, probe_data in dataset.probes.items():
-                if str(serial_number) == inserted_probe_serial_number:
-                    break
+            for serial_number in dataset.probes:
+                if str(serial_number) == inserted_probe_serial_number:
+                    break
-            for serial_number, probe_data in dataset.probes.items():
-                if str(serial_number) == inserted_probe_serial_number:
-                    break
+            for serial_number in dataset.probes:
+                if str(serial_number) == inserted_probe_serial_number:
+                    probe_data = dataset.probes[serial_number]
+                    break
-            for serial_number, probe_data in dataset.probes.items():
-                if str(serial_number) == inserted_probe_serial_number:
-                    break
+            for serial_number in dataset.probes:
+                if str(serial_number) == inserted_probe_serial_number:
+                    break
-            for serial_number, probe_data in dataset.probes.items():
-                if str(serial_number) == inserted_probe_serial_number:
-                    break
+            for serial_number in dataset.probes:
+                if str(serial_number) == inserted_probe_serial_number:
+                    probe_data = dataset.probes[serial_number]
+                    break
@@ -220,8 +224,7 @@ def make(self, key):
                           'acq_software': acq_software,
                           'sampling_rate': probe_data.ap_meta['sample_rate']})
 
-            root_dir = find_root_directory(
-                get_ephys_root_data_dir(),
+            root_dir = find_root_directory(get_ephys_root_data_dir(),
                 probe_data.recording_info['recording_files'][0])
             self.EphysFile.insert([{**key,
                                     'file_path': fp.relative_to(root_dir).as_posix()}
@@ -290,8 +293,11 @@ def make(self, key):
                 shank, shank_col, shank_row, _ = spikeglx_recording.apmeta.shankmap['data'][recorded_site]
                 electrode_keys.append(probe_electrodes[(shank, shank_col, shank_row)])
         elif acq_software == 'Open Ephys':
-            sess_dir = pathlib.Path(get_session_directory(key))
-            loaded_oe = openephys.OpenEphys(sess_dir)
+
+            session_dir = find_full_path(get_ephys_root_data_dir(), 
+                                         get_session_directory(key))
+
+            loaded_oe = openephys.OpenEphys(session_dir)
             oe_probe = loaded_oe.probes[probe_sn]
 
             lfp_channel_ind = np.arange(
@@ -442,16 +448,16 @@ class Curation(dj.Manual):
     curation_id: int
     ---
     curation_time: datetime             # time of generation of this set of curated clustering results 
-    curation_output_dir: varchar(255)   # output directory of the curated results, relative to clustering root data directory
+    curation_output_dir: varchar(255)   # output directory of the curated results, relative to root data directory
     quality_control: bool               # has this clustering result undergone quality control?
     manual_curation: bool               # has manual curation been performed on this clustering result?
     curation_note='': varchar(2000)  
     """
 
     def create1_from_clustering_task(self, key, curation_note=''):
         """
-        A convenient function to create a new corresponding "Curation"
-         for a particular "ClusteringTask"
+        A function to create a new corresponding "Curation" for a particular 
+        "ClusteringTask"
         """
         if key not in Clustering():
             raise ValueError(f'No corresponding entry in Clustering available'
@@ -465,8 +471,10 @@ def create1_from_clustering_task(self, key, curation_note=''):
         # Synthesize curation_id
         curation_id = dj.U().aggr(self & key, n='ifnull(max(curation_id)+1,1)').fetch1('n')
         self.insert1({**key, 'curation_id': curation_id,
-                      'curation_time': creation_time, 'curation_output_dir': output_dir,
-                      'quality_control': is_qc, 'manual_curation': is_curated,
+                      'curation_time': creation_time, 
+                      'curation_output_dir': output_dir,
+                      'quality_control': is_qc, 
+                      'manual_curation': is_curated,
                       'curation_note': curation_note})
 
 
@@ -613,8 +621,9 @@ def yield_unit_waveforms():
                 spikeglx_meta_filepath = get_spikeglx_meta_filepath(key)
                 neuropixels_recording = spikeglx.SpikeGLX(spikeglx_meta_filepath.parent)
             elif acq_software == 'Open Ephys':
-                sess_dir = pathlib.Path(get_session_directory(key))
-                openephys_dataset = openephys.OpenEphys(sess_dir)
+                session_dir = find_full_path(get_ephys_root_data_dir(), 
+                                             get_session_directory(key))
+                openephys_dataset = openephys.OpenEphys(session_dir)
                 neuropixels_recording = openephys_dataset.probes[probe_serial_number]
 
             def yield_unit_waveforms():
@@ -659,11 +668,13 @@ def get_spikeglx_meta_filepath(ephys_recording_key):
     except FileNotFoundError:
         # if not found, search in session_dir again
         if not spikeglx_meta_filepath.exists():
-            sess_dir = pathlib.Path(get_session_directory(ephys_recording_key))
+            session_dir = find_full_path(get_ephys_root_data_dir(), 
+                                         get_session_directory(
+                                             ephys_recording_key))
             inserted_probe_serial_number = (ProbeInsertion * probe.Probe
                                             & ephys_recording_key).fetch1('probe')
 
-            spikeglx_meta_filepaths = [fp for fp in sess_dir.rglob('*.ap.meta')]
+            spikeglx_meta_filepaths = [fp for fp in session_dir.rglob('*.ap.meta')]
             for meta_filepath in spikeglx_meta_filepaths:
                 spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath)
                 if str(spikeglx_meta.probe_SN) == inserted_probe_serial_number:
@@ -696,8 +707,9 @@ def get_neuropixels_channel2electrode_map(ephys_recording_key, acq_software):
             for recorded_site, (shank, shank_col, shank_row, _) in enumerate(
                 spikeglx_meta.shankmap['data'])}
     elif acq_software == 'Open Ephys':
-        sess_dir = pathlib.Path(get_session_directory(ephys_recording_key))
-        openephys_dataset = openephys.OpenEphys(sess_dir)
+        session_dir = find_full_path(get_ephys_root_data_dir(), 
+                                     get_session_directory(ephys_recording_key))
+        openephys_dataset = openephys.OpenEphys(session_dir)
         probe_serial_number = (ProbeInsertion & ephys_recording_key).fetch1('probe')
         probe_dataset = openephys_dataset.probes[probe_serial_number]