diff --git a/.gitignore b/.gitignore index f83f9435..d1782d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# User data +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 802db8bb..27a39c27 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # DataJoint Element - Array Electrophysiology Element -DataJoint Element for array electrophysiology. This repository features DataJoint pipeline design for extracellular array electrophysiology, with ***Neuropixels*** probe and ***kilosort*** spike sorting method. @@ -13,12 +12,16 @@ ephys pipeline. See [Background](Background.md) for the background information and development timeline. -## The Pipeline Architecture +## Element architecture ![element-array-ephys diagram](images/attached_array_ephys_element.svg) As the diagram depicts, the array ephys element starts immediately downstream from ***Session***, -and also requires some notion of ***Location*** as a dependency for ***InsertionLocation***. +and also requires some notion of ***Location*** as a dependency for ***InsertionLocation***. We +provide an [example workflow](https://github.com/datajoint/workflow-array-ephys/) with a +[pipeline script](https://github.com/datajoint/workflow-array-ephys/blob/main/workflow_array_ephys/pipeline.py) +that models (a) combining this Element with the corresponding [Element-Session](https://github.com/datajoint/element-session) +, and (b) declaring a ***SkullReference*** table to provide Location. ### The design of probe @@ -45,14 +48,24 @@ This ephys element features automatic ingestion for spike sorting results from t + ***WaveformSet*** - A set of spike waveforms for units from a given CuratedClustering ## Installation -``` -pip install element-array-ephys -``` -If you already have an older version of ***element-array-ephys*** installed using `pip`, upgrade with -``` -pip install --upgrade element-array-ephys -``` ++ Install `element-array-ephys` + ``` + pip install element-array-ephys + ``` + ++ Upgrade `element-array-ephys` previously installed with `pip` + ``` + pip install --upgrade element-array-ephys + ``` + ++ Install `element-interface` + + + `element-interface` is a dependency of `element-array-ephys`, however it is not contained within `requirements.txt`. + + ``` + pip install "element-interface @ git+https://github.com/datajoint/element-interface" + ``` ## Usage @@ -65,12 +78,12 @@ To activate the `element-array-ephys`, ones need to provide: + schema name for the ephys module 2. Upstream tables - + Session table - + SkullReference table (Reference table for InsertionLocation, specifying the skull reference) + + Session table: A set of keys identifying a recording session (see [Element-Session](https://github.com/datajoint/element-session)). + + SkullReference table: A reference table for InsertionLocation, specifying the skull reference (see [example pipeline](https://github.com/datajoint/workflow-array-ephys/blob/main/workflow_array_ephys/pipeline.py)). -3. Utility functions - + get_ephys_root_data_dir() - + get_session_directory() +3. Utility functions. See [example definitions here](https://github.com/datajoint/workflow-array-ephys/blob/main/workflow_array_ephys/paths.py) + + get_ephys_root_data_dir(): Returns your root data directory. + + get_session_directory(): Returns the path of the session data relative to the root. For more detail, check the docstring of the `element-array-ephys`: diff --git a/element_array_ephys/__init__.py b/element_array_ephys/__init__.py index 3637d2e5..e69de29b 100644 --- a/element_array_ephys/__init__.py +++ b/element_array_ephys/__init__.py @@ -1,69 +0,0 @@ -import datajoint as dj -import pathlib -import uuid -import hashlib - - -dj.config['enable_python_native_blobs'] = True - - -def find_full_path(root_directories, relative_path): - """ - Given a relative path, search and return the full-path - from provided potential root directories (in the given order) - :param root_directories: potential root directories - :param relative_path: the relative path to find the valid root directory - :return: root_directory (pathlib.Path object) - """ - relative_path = pathlib.Path(relative_path) - - if relative_path.exists(): - return relative_path - - # turn to list if only a single root directory is provided - if isinstance(root_directories, (str, pathlib.Path)): - root_directories = [root_directories] - - for root_dir in root_directories: - if (pathlib.Path(root_dir) / relative_path).exists(): - return pathlib.Path(root_dir) / relative_path - - raise FileNotFoundError('No valid full-path found (from {})' - ' for {}'.format(root_directories, relative_path)) - - -def find_root_directory(root_directories, full_path): - """ - Given multiple potential root directories and a full-path, - search and return one directory that is the parent of the given path - :param root_directories: potential root directories - :param full_path: the relative path to search the root directory - :return: full-path (pathlib.Path object) - """ - full_path = pathlib.Path(full_path) - - if not full_path.exists(): - raise FileNotFoundError(f'{full_path} does not exist!') - - # turn to list if only a single root directory is provided - if isinstance(root_directories, (str, pathlib.Path)): - root_directories = [root_directories] - - try: - return next(pathlib.Path(root_dir) for root_dir in root_directories - if pathlib.Path(root_dir) in set(full_path.parents)) - - except StopIteration: - raise FileNotFoundError('No valid root directory found (from {})' - ' for {}'.format(root_directories, full_path)) - - -def dict_to_uuid(key): - """ - Given a dictionary `key`, returns a hash string as UUID - """ - hashed = hashlib.md5() - for k, v in sorted(key.items()): - hashed.update(str(k).encode()) - hashed.update(str(v).encode()) - return uuid.UUID(hex=hashed.hexdigest()) \ No newline at end of file diff --git a/element_array_ephys/ephys.py b/element_array_ephys/ephys.py index 3eec7842..a434d158 100644 --- a/element_array_ephys/ephys.py +++ b/element_array_ephys/ephys.py @@ -4,9 +4,10 @@ import numpy as np import inspect import importlib +from element_interface.utils import find_root_directory, find_full_path, dict_to_uuid from .readers import spikeglx, kilosort, openephys -from . import probe, find_full_path, find_root_directory, dict_to_uuid +from . import probe schema = dj.schema() @@ -46,7 +47,6 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True, global _linking_module _linking_module = linking_module - # activate probe.activate(probe_schema_name, create_schema=create_schema, create_tables=create_tables) schema.activate(ephys_schema_name, create_schema=create_schema, @@ -57,9 +57,10 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True, def get_ephys_root_data_dir() -> list: """ - All data paths, directories in DataJoint Elements are recommended to be stored as - relative paths, with respect to some user-configured "root" directory, - which varies from machine to machine (e.g. different mounted drive locations) + All data paths, directories in DataJoint Elements are recommended to be + stored as relative paths, with respect to some user-configured "root" + directory, which varies from machine to machine (e.g. different mounted + drive locations) get_ephys_root_data_dir() -> list This user-provided function retrieves the possible root data directories @@ -78,7 +79,7 @@ def get_session_directory(session_key: dict) -> str: Retrieve the session directory containing the recorded Neuropixels data for a given Session :param session_key: a dictionary of one Session `key` - :return: a string for full path to the session directory + :return: a string for relative or full path to the session directory """ return _linking_module.get_session_directory(session_key) @@ -140,21 +141,24 @@ class EphysFile(dj.Part): """ def make(self, key): - sess_dir = pathlib.Path(get_session_directory(key)) + + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) inserted_probe_serial_number = (ProbeInsertion * probe.Probe & key).fetch1('probe') # search session dir and determine acquisition software for ephys_pattern, ephys_acq_type in zip(['*.ap.meta', '*.oebin'], ['SpikeGLX', 'Open Ephys']): - ephys_meta_filepaths = [fp for fp in sess_dir.rglob(ephys_pattern)] + ephys_meta_filepaths = [fp for fp in session_dir.rglob(ephys_pattern)] if ephys_meta_filepaths: acq_software = ephys_acq_type break else: raise FileNotFoundError( f'Ephys recording data not found!' - f' Neither SpikeGLX nor Open Ephys recording files found') + f' Neither SpikeGLX nor Open Ephys recording files found' + f' in {session_dir}') if acq_software == 'SpikeGLX': for meta_filepath in ephys_meta_filepaths: @@ -187,12 +191,13 @@ def make(self, key): 'acq_software': acq_software, 'sampling_rate': spikeglx_meta.meta['imSampRate']}) - root_dir = find_root_directory(get_ephys_root_data_dir(), meta_filepath) + root_dir = find_root_directory(get_ephys_root_data_dir(), + meta_filepath) self.EphysFile.insert1({ **key, 'file_path': meta_filepath.relative_to(root_dir).as_posix()}) elif acq_software == 'Open Ephys': - dataset = openephys.OpenEphys(sess_dir) + dataset = openephys.OpenEphys(session_dir) for serial_number, probe_data in dataset.probes.items(): if str(serial_number) == inserted_probe_serial_number: break @@ -220,8 +225,7 @@ def make(self, key): 'acq_software': acq_software, 'sampling_rate': probe_data.ap_meta['sample_rate']}) - root_dir = find_root_directory( - get_ephys_root_data_dir(), + root_dir = find_root_directory(get_ephys_root_data_dir(), probe_data.recording_info['recording_files'][0]) self.EphysFile.insert([{**key, 'file_path': fp.relative_to(root_dir).as_posix()} @@ -290,8 +294,11 @@ def make(self, key): shank, shank_col, shank_row, _ = spikeglx_recording.apmeta.shankmap['data'][recorded_site] electrode_keys.append(probe_electrodes[(shank, shank_col, shank_row)]) elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(key)) - loaded_oe = openephys.OpenEphys(sess_dir) + + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) + + loaded_oe = openephys.OpenEphys(session_dir) oe_probe = loaded_oe.probes[probe_sn] lfp_channel_ind = np.arange( @@ -442,7 +449,7 @@ class Curation(dj.Manual): curation_id: int --- curation_time: datetime # time of generation of this set of curated clustering results - curation_output_dir: varchar(255) # output directory of the curated results, relative to clustering root data directory + curation_output_dir: varchar(255) # output directory of the curated results, relative to root data directory quality_control: bool # has this clustering result undergone quality control? manual_curation: bool # has manual curation been performed on this clustering result? curation_note='': varchar(2000) @@ -450,8 +457,8 @@ class Curation(dj.Manual): def create1_from_clustering_task(self, key, curation_note=''): """ - A convenient function to create a new corresponding "Curation" - for a particular "ClusteringTask" + A function to create a new corresponding "Curation" for a particular + "ClusteringTask" """ if key not in Clustering(): raise ValueError(f'No corresponding entry in Clustering available' @@ -465,8 +472,10 @@ def create1_from_clustering_task(self, key, curation_note=''): # Synthesize curation_id curation_id = dj.U().aggr(self & key, n='ifnull(max(curation_id)+1,1)').fetch1('n') self.insert1({**key, 'curation_id': curation_id, - 'curation_time': creation_time, 'curation_output_dir': output_dir, - 'quality_control': is_qc, 'manual_curation': is_curated, + 'curation_time': creation_time, + 'curation_output_dir': output_dir, + 'quality_control': is_qc, + 'manual_curation': is_curated, 'curation_note': curation_note}) @@ -613,8 +622,9 @@ def yield_unit_waveforms(): spikeglx_meta_filepath = get_spikeglx_meta_filepath(key) neuropixels_recording = spikeglx.SpikeGLX(spikeglx_meta_filepath.parent) elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(key)) - openephys_dataset = openephys.OpenEphys(sess_dir) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) + openephys_dataset = openephys.OpenEphys(session_dir) neuropixels_recording = openephys_dataset.probes[probe_serial_number] def yield_unit_waveforms(): @@ -659,11 +669,13 @@ def get_spikeglx_meta_filepath(ephys_recording_key): except FileNotFoundError: # if not found, search in session_dir again if not spikeglx_meta_filepath.exists(): - sess_dir = pathlib.Path(get_session_directory(ephys_recording_key)) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory( + ephys_recording_key)) inserted_probe_serial_number = (ProbeInsertion * probe.Probe & ephys_recording_key).fetch1('probe') - spikeglx_meta_filepaths = [fp for fp in sess_dir.rglob('*.ap.meta')] + spikeglx_meta_filepaths = [fp for fp in session_dir.rglob('*.ap.meta')] for meta_filepath in spikeglx_meta_filepaths: spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath) if str(spikeglx_meta.probe_SN) == inserted_probe_serial_number: @@ -696,8 +708,9 @@ def get_neuropixels_channel2electrode_map(ephys_recording_key, acq_software): for recorded_site, (shank, shank_col, shank_row, _) in enumerate( spikeglx_meta.shankmap['data'])} elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(ephys_recording_key)) - openephys_dataset = openephys.OpenEphys(sess_dir) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(ephys_recording_key)) + openephys_dataset = openephys.OpenEphys(session_dir) probe_serial_number = (ProbeInsertion & ephys_recording_key).fetch1('probe') probe_dataset = openephys_dataset.probes[probe_serial_number] diff --git a/element_array_ephys/ephys_chronic.py b/element_array_ephys/ephys_chronic.py index 95268443..16aa00d4 100644 --- a/element_array_ephys/ephys_chronic.py +++ b/element_array_ephys/ephys_chronic.py @@ -4,9 +4,10 @@ import numpy as np import inspect import importlib +from element_interface.utils import find_root_directory, find_full_path, dict_to_uuid from .readers import spikeglx, kilosort, openephys -from . import probe, find_full_path, find_root_directory, dict_to_uuid +from . import probe schema = dj.schema() @@ -47,7 +48,6 @@ def activate(ephys_schema_name, probe_schema_name=None, *, create_schema=True, global _linking_module _linking_module = linking_module - # activate probe.activate(probe_schema_name, create_schema=create_schema, create_tables=create_tables) schema.activate(ephys_schema_name, create_schema=create_schema, @@ -89,7 +89,7 @@ def get_session_directory(session_key: dict) -> str: @schema class AcquisitionSoftware(dj.Lookup): - definition = """ # Name of software used for recording of neuropixels probes - SpikeGLX or Open Ephys + definition = """ # Software used for recording of neuropixels probes acq_software: varchar(24) """ contents = zip(['SpikeGLX', 'Open Ephys']) @@ -143,21 +143,23 @@ class EphysFile(dj.Part): """ def make(self, key): - sess_dir = pathlib.Path(get_session_directory(key)) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) inserted_probe_serial_number = (ProbeInsertion * probe.Probe & key).fetch1('probe') # search session dir and determine acquisition software for ephys_pattern, ephys_acq_type in zip(['*.ap.meta', '*.oebin'], ['SpikeGLX', 'Open Ephys']): - ephys_meta_filepaths = [fp for fp in sess_dir.rglob(ephys_pattern)] + ephys_meta_filepaths = [fp for fp in session_dir.rglob(ephys_pattern)] if ephys_meta_filepaths: acq_software = ephys_acq_type break else: raise FileNotFoundError( f'Ephys recording data not found!' - f' Neither SpikeGLX nor Open Ephys recording files found') + f' Neither SpikeGLX nor Open Ephys recording files found' + f' in {session_dir}') if acq_software == 'SpikeGLX': for meta_filepath in ephys_meta_filepaths: @@ -190,12 +192,13 @@ def make(self, key): 'acq_software': acq_software, 'sampling_rate': spikeglx_meta.meta['imSampRate']}) - root_dir = find_root_directory(get_ephys_root_data_dir(), meta_filepath) + root_dir = find_root_directory(get_ephys_root_data_dir(), + meta_filepath) self.EphysFile.insert1({ **key, 'file_path': meta_filepath.relative_to(root_dir).as_posix()}) elif acq_software == 'Open Ephys': - dataset = openephys.OpenEphys(sess_dir) + dataset = openephys.OpenEphys(session_dir) for serial_number, probe_data in dataset.probes.items(): if str(serial_number) == inserted_probe_serial_number: break @@ -223,9 +226,8 @@ def make(self, key): 'acq_software': acq_software, 'sampling_rate': probe_data.ap_meta['sample_rate']}) - root_dir = find_root_directory( - get_ephys_root_data_dir(), - probe_data.recording_info['recording_files'][0]) + root_dir = find_root_directory(get_ephys_root_data_dir(), + probe_data.recording_info['recording_files'][0]) self.EphysFile.insert([{**key, 'file_path': fp.relative_to(root_dir).as_posix()} for fp in probe_data.recording_info['recording_files']]) @@ -293,8 +295,9 @@ def make(self, key): shank, shank_col, shank_row, _ = spikeglx_recording.apmeta.shankmap['data'][recorded_site] electrode_keys.append(probe_electrodes[(shank, shank_col, shank_row)]) elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(key)) - loaded_oe = openephys.OpenEphys(sess_dir) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) + loaded_oe = openephys.OpenEphys(session_dir) oe_probe = loaded_oe.probes[probe_sn] lfp_channel_ind = np.arange( @@ -616,8 +619,9 @@ def yield_unit_waveforms(): spikeglx_meta_filepath = get_spikeglx_meta_filepath(key) neuropixels_recording = spikeglx.SpikeGLX(spikeglx_meta_filepath.parent) elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(key)) - openephys_dataset = openephys.OpenEphys(sess_dir) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(key)) + openephys_dataset = openephys.OpenEphys(session_dir) neuropixels_recording = openephys_dataset.probes[probe_serial_number] def yield_unit_waveforms(): @@ -662,11 +666,13 @@ def get_spikeglx_meta_filepath(ephys_recording_key): except FileNotFoundError: # if not found, search in session_dir again if not spikeglx_meta_filepath.exists(): - sess_dir = pathlib.Path(get_session_directory(ephys_recording_key)) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory( + ephys_recording_key)) inserted_probe_serial_number = (ProbeInsertion * probe.Probe & ephys_recording_key).fetch1('probe') - spikeglx_meta_filepaths = [fp for fp in sess_dir.rglob('*.ap.meta')] + spikeglx_meta_filepaths = [fp for fp in session_dir.rglob('*.ap.meta')] for meta_filepath in spikeglx_meta_filepaths: spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath) if str(spikeglx_meta.probe_SN) == inserted_probe_serial_number: @@ -674,7 +680,8 @@ def get_spikeglx_meta_filepath(ephys_recording_key): break else: raise FileNotFoundError( - 'No SpikeGLX data found for probe insertion: {}'.format(ephys_recording_key)) + 'No SpikeGLX data found for probe insertion: {}'.format( + ephys_recording_key)) return spikeglx_meta_filepath @@ -699,8 +706,9 @@ def get_neuropixels_channel2electrode_map(ephys_recording_key, acq_software): for recorded_site, (shank, shank_col, shank_row, _) in enumerate( spikeglx_meta.shankmap['data'])} elif acq_software == 'Open Ephys': - sess_dir = pathlib.Path(get_session_directory(ephys_recording_key)) - openephys_dataset = openephys.OpenEphys(sess_dir) + session_dir = find_full_path(get_ephys_root_data_dir(), + get_session_directory(ephys_recording_key)) + openephys_dataset = openephys.OpenEphys(session_dir) probe_serial_number = (ProbeInsertion & ephys_recording_key).fetch1('probe') probe_dataset = openephys_dataset.probes[probe_serial_number] diff --git a/element_array_ephys/readers/openephys.py b/element_array_ephys/readers/openephys.py index 737e710c..7e8b240d 100644 --- a/element_array_ephys/readers/openephys.py +++ b/element_array_ephys/readers/openephys.py @@ -31,13 +31,13 @@ class OpenEphys: def __init__(self, experiment_dir): - self.sess_dir = pathlib.Path(experiment_dir) + self.session_dir = pathlib.Path(experiment_dir) - openephys_file = pyopenephys.File(self.sess_dir.parent) # this is on the Record Node level + openephys_file = pyopenephys.File(self.session_dir.parent) # this is on the Record Node level # extract the "recordings" for this session self.experiment = next(experiment for experiment in openephys_file.experiments - if pathlib.Path(experiment.absolute_foldername) == self.sess_dir) + if pathlib.Path(experiment.absolute_foldername) == self.session_dir) self.recording_time = self.experiment.datetime diff --git a/element_array_ephys/readers/spikeglx.py b/element_array_ephys/readers/spikeglx.py index ac9b2358..67569989 100644 --- a/element_array_ephys/readers/spikeglx.py +++ b/element_array_ephys/readers/spikeglx.py @@ -176,7 +176,7 @@ def __init__(self, meta_filepath): self.recording_time = datetime.strptime(self.meta.get('fileCreateTime_original', self.meta['fileCreateTime']), '%Y-%m-%dT%H:%M:%S') - self.recording_duration = self.meta['fileTimeSecs'] + self.recording_duration = self.meta.get('fileTimeSecs') # Get probe serial number - 'imProbeSN' for 3A and 'imDatPrb_sn' for 3B try: diff --git a/setup.py b/setup.py index eda1c8d6..fb96d4c0 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,8 @@ description="DataJoint Element for Extracellular Array Electrophysiology", long_description=long_description, long_description_content_type='text/markdown', - author='DataJoint NEURO', - author_email='info@vathes.com', + author='DataJoint', + author_email='info@datajoint.com', license='MIT', url=f'https://github.com/datajoint/{pkg_name.replace("_", "-")}', keywords='neuroscience electrophysiology science datajoint',