diff --git a/notebooks/00-data-download-optional.ipynb b/notebooks/00-data-download-optional.ipynb index c21cea39..a73d3c3c 100644 --- a/notebooks/00-data-download-optional.ipynb +++ b/notebooks/00-data-download-optional.ipynb @@ -27,6 +27,7 @@ "metadata": {}, "outputs": [], "source": [ + "%%bash\n", "pip install git+https://github.com/datajoint/djarchive-client.git" ] }, @@ -183,7 +184,7 @@ ], "metadata": { "jupytext": { - "formats": "ipynb,py" + "formats": "ipynb,py_scripts//py" }, "kernelspec": { "display_name": "ephys_workflow_runner", diff --git a/notebooks/py_scripts/00-data-download-optional.py b/notebooks/py_scripts/00-data-download-optional.py index 253d889b..221e4e88 100644 --- a/notebooks/py_scripts/00-data-download-optional.py +++ b/notebooks/py_scripts/00-data-download-optional.py @@ -1,12 +1,12 @@ # --- # jupyter: # jupytext: -# formats: ipynb,py +# formats: ipynb,py_scripts//py # text_representation: # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.13.7 +# jupytext_version: 1.14.0 # kernelspec: # display_name: ephys_workflow_runner # language: python @@ -19,12 +19,15 @@ # The example dataset was hosted on djarchive, an AWS storage. We provide a client package to download the data.[djarchive-client](https://github.com/datajoint/djarchive-client), which could be installed with pip: -pip install git+https://github.com/datajoint/djarchive-client.git +# + language="bash" +# pip install git+https://github.com/datajoint/djarchive-client.git +# - # ## Download ephys test datasets using `djarchive-client` import os import djarchive_client + client = djarchive_client.client() # To browse the datasets that are available in djarchive: @@ -37,17 +40,22 @@ # To download the dataset, let's prepare a root directory, for example in `/tmp`: -os.mkdir('/tmp/test_data') +os.mkdir("/tmp/test_data") # Get the dataset revision with the current version of the workflow: from workflow_array_ephys import version -revision = version.__version__.replace('.', '_') + +revision = version.__version__.replace(".", "_") revision # Then run download for a given set and the revision: -client.download('workflow-array-ephys-test-set', target_directory='/tmp/test_data', revision=revision) +client.download( + "workflow-array-ephys-test-set", + target_directory="/tmp/test_data", + revision=revision, +) # ## Directory organization # After downloading, the directory will be organized as follows: diff --git a/notebooks/py_scripts/01-configure.py b/notebooks/py_scripts/01-configure.py index a0dd0254..675eee8d 100644 --- a/notebooks/py_scripts/01-configure.py +++ b/notebooks/py_scripts/01-configure.py @@ -26,10 +26,13 @@ # - import os + # change to the upper level folder -if os.path.basename(os.getcwd())=='notebooks': os.chdir('..') -assert os.path.basename(os.getcwd())=='workflow-array-ephys', ("Please move to the " - + "workflow directory") +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +assert os.path.basename(os.getcwd()) == "workflow-array-ephys", ( + "Please move to the " + "workflow directory" +) import datajoint as dj # ## Setup - Credentials @@ -37,9 +40,10 @@ # Now let's set up the host, user and password in the `dj.config` global variable import getpass -dj.config['database.host'] = '{YOUR_HOST}' -dj.config['database.user'] = '{YOUR_USERNAME}' -dj.config['database.password'] = getpass.getpass() # enter the password securily + +dj.config["database.host"] = "{YOUR_HOST}" +dj.config["database.user"] = "{YOUR_USERNAME}" +dj.config["database.password"] = getpass.getpass() # enter the password securily # You should be able to connect to the database at this stage. @@ -55,11 +59,11 @@ # # The prefix could be configurated as follows in `dj.config`: -dj.config['custom'] = {'database.prefix': 'neuro_'} +dj.config["custom"] = {"database.prefix": "neuro_"} # ### Root directories for raw/processed data # -# `ephys_root_data_dir` field indicates the root directory for +# `ephys_root_data_dir` field indicates the root directory for # + The **ephys raw data** from SpikeGLX or OpenEphys, including `*{.ap,lf}.{bin,meta}` # + The **clustering results** from kilosort2 (e.g. `spike_{times,clusters}.npy` # @@ -77,9 +81,9 @@ # ``` # If there is only one root path. -dj.config['custom']['ephys_root_data_dir'] = '/tmp/test_data' +dj.config["custom"]["ephys_root_data_dir"] = "/tmp/test_data" # If there are multiple possible root paths: -dj.config['custom']['ephys_root_data_dir'] = ['/tmp/test_data1', '/tmp/test_data2'] +dj.config["custom"]["ephys_root_data_dir"] = ["/tmp/test_data1", "/tmp/test_data2"] dj.config @@ -92,7 +96,7 @@ # # `element-array-ephys` offers 3 different schemas: `acute`, `chronic`, and `no-curation`. For more information about each, please visit the [electrophysiology description page](https://elements.datajoint.org/description/array_ephys/). This decision should be made before first activating the schema. Note: only `no-curation` is supported for export to NWB directly from the Element. -dj.config['custom']['ephys_mode']='no-curation' # or acute or chronic +dj.config["custom"]["ephys_mode"] = "no-curation" # or acute or chronic # ## Save configuration # diff --git a/notebooks/py_scripts/02-workflow-structure-optional.py b/notebooks/py_scripts/02-workflow-structure-optional.py index e71ccd49..507718e4 100644 --- a/notebooks/py_scripts/02-workflow-structure-optional.py +++ b/notebooks/py_scripts/02-workflow-structure-optional.py @@ -24,7 +24,9 @@ # To load the local configuration, we will change the directory to the package root. import os -if os.path.basename(os.getcwd())=='notebooks': os.chdir('..') + +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") # ## Schemas and tables @@ -57,24 +59,24 @@ # + `dj.list_schemas()`: list all schemas a user could access. dj.list_schemas() -# + `dj.Diagram()`: plot tables and dependencies. +# + `dj.Diagram()`: plot tables and dependencies. # + `dj.Diagram()`: plot tables and dependencies # plot diagram for all tables in a schema dj.Diagram(ephys) # - -# **Table tiers**: +# **Table tiers**: # -# Manual table: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. -# Lookup table: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. -# Imported table: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. -# Computed table: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for +# Manual table: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. +# Lookup table: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. +# Imported table: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. +# Computed table: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for # Part table: plain text, as an appendix to the master table, all the part entries of a given master entry represent a intact set of the master entry. e.g. Unit of a CuratedClustering. # -# **Dependencies**: +# **Dependencies**: # -# One-to-one primary: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. +# One-to-one primary: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. # One-to-many primary: thin solid line, inherit the primary key from the parent table, but have additional field(s) as part of the primary key as well # secondary dependency: dashed line, the child table inherits the primary key fields from parent table as its own secondary attribute. @@ -95,7 +97,7 @@ # + `heading`: [markdown] # # + `describe()`: show table definition with foreign key references. # - -ephys.EphysRecording.describe(); +ephys.EphysRecording.describe() # + `heading`: show attribute definitions regardless of foreign key references @@ -115,14 +117,14 @@ dj.Diagram(subject) # + [subject](https://github.com/datajoint/element-animal): contains the basic information of subject, including Strain, Line, Subject, Zygosity, and SubjectDeath information. -subject.Subject.describe(); +subject.Subject.describe() # + [`session`](https://github.com/datajoint/element-session): General information of experimental sessions. dj.Diagram(session) # + [session](https://github.com/datajoint/element-session): experimental session information -session.Session.describe(); +session.Session.describe() # + [`ephys`](https://github.com/datajoint/element-array-ephys): Neuropixel based probe and ephys information diff --git a/notebooks/py_scripts/03-process.py b/notebooks/py_scripts/03-process.py index c8ee604e..2856e0ec 100644 --- a/notebooks/py_scripts/03-process.py +++ b/notebooks/py_scripts/03-process.py @@ -16,7 +16,7 @@ # # Interatively run array ephys workflow -# This notebook walks you through the steps in detail to run the ephys workflow. +# This notebook walks you through the steps in detail to run the ephys workflow. # # + If you need a more automatic approach to run the workflow, refer to [03-automate](03-automate.ipynb) # + The workflow requires neuropixels meta file and kilosort output data. If you haven't configure the paths, refer to [01-configure](01-configure.ipynb) @@ -25,7 +25,8 @@ # Let's will change the directory to the package root to load configuration and also import relevant schemas. import os -os.chdir('..') + +os.chdir("..") import datajoint as dj from workflow_array_ephys.pipeline import lab, subject, session, probe, ephys @@ -40,7 +41,12 @@ # + probe.Probe: probe information # + ephys.ProbeInsertion: probe insertion into an animal subject during a given experimental session -dj.Diagram(subject.Subject) + dj.Diagram(session.Session) + dj.Diagram(probe.Probe) + dj.Diagram(ephys.ProbeInsertion) +( + dj.Diagram(subject.Subject) + + dj.Diagram(session.Session) + + dj.Diagram(probe.Probe) + + dj.Diagram(ephys.ProbeInsertion) +) # Our example dataset is for subject6, session1. @@ -50,33 +56,38 @@ # insert entries with insert1() or insert(), with all required attributes specified in a dictionary subject.Subject.insert1( - dict(subject='subject6', sex='M', subject_birth_date='2020-01-04'), - skip_duplicates=True) # skip_duplicates avoids error when inserting entries with duplicated primary keys + dict(subject="subject6", sex="M", subject_birth_date="2020-01-04"), + skip_duplicates=True, +) # skip_duplicates avoids error when inserting entries with duplicated primary keys subject.Subject() # ### Ingest Session -session.Session.describe(); +session.Session.describe() session.Session.heading -session_key = dict(subject='subject6', session_datetime='2021-01-15 11:16:38') +session_key = dict(subject="subject6", session_datetime="2021-01-15 11:16:38") session.Session.insert1(session_key, skip_duplicates=True) session.Session() # ### Ingest SessionDirectory -session.SessionDirectory.describe(); +session.SessionDirectory.describe() session.SessionDirectory.heading session.SessionDirectory.insert1( - dict(subject='subject6', session_datetime='2021-01-15 11:16:38', - session_dir='subject6/session1'), - skip_duplicates=True) + dict( + subject="subject6", + session_datetime="2021-01-15 11:16:38", + session_dir="subject6/session1", + ), + skip_duplicates=True, +) session.SessionDirectory() -# **Note**: +# **Note**: # # the `session_dir` needs to be: # + a directory **relative to** the `ephys_root_path` in the configuration file, refer to [01-configure](01-configure.ipynb) for more information. @@ -87,20 +98,25 @@ probe.Probe.heading probe.Probe.insert1( - dict(probe='17131311651', probe_type='neuropixels 1.0 - 3B'), - skip_duplicates=True) # this info could be achieve from neuropixels meta file. + dict(probe="17131311651", probe_type="neuropixels 1.0 - 3B"), skip_duplicates=True +) # this info could be achieve from neuropixels meta file. probe.Probe() # ### Ingest ProbeInsertion -ephys.ProbeInsertion.describe(); +ephys.ProbeInsertion.describe() ephys.ProbeInsertion.heading ephys.ProbeInsertion.insert1( - dict(subject='subject6', session_datetime="2021-01-15 11:16:38", - insertion_number=0, probe='17131311651'), - skip_duplicates=True) # probe, subject, session_datetime needs to follow the restrictions of foreign keys. + dict( + subject="subject6", + session_datetime="2021-01-15 11:16:38", + insertion_number=0, + probe="17131311651", + ), + skip_duplicates=True, +) # probe, subject, session_datetime needs to follow the restrictions of foreign keys. ephys.ProbeInsertion() # ## Automate this manual step @@ -121,9 +137,12 @@ # Now we are ready to populate EphysRecording, a table for entries of ephys recording in a particular session. -dj.Diagram(session.Session) + \ -(dj.Diagram(probe.ElectrodeConfig) + 1) + \ -dj.Diagram(ephys.EphysRecording) + dj.Diagram(ephys.EphysRecording.EphysFile) +( + dj.Diagram(session.Session) + + (dj.Diagram(probe.ElectrodeConfig) + 1) + + dj.Diagram(ephys.EphysRecording) + + dj.Diagram(ephys.EphysRecording.EphysFile) +) # # +1 means plotting 1 more layer of the child tables # The first argument specify a particular session to populate @@ -131,7 +150,7 @@ # Populate EphysRecording extracts the following information from .ap.meta file from SpikeGLX: # -# 1. **probe.EelectrodeConfig**: this procedure detects new ElectrodeConfig, i.e. which 384 electrodes out of the total 960 on the probe were used in this ephys session, and save the results into the table `probe.EelectrodeConfig`. Each entry in table `ephys.EphysRecording` specifies which ElectrodeConfig is used in a particular ephys session. +# 1. **probe.EelectrodeConfig**: this procedure detects new ElectrodeConfig, i.e. which 384 electrodes out of the total 960 on the probe were used in this ephys session, and save the results into the table `probe.EelectrodeConfig`. Each entry in table `ephys.EphysRecording` specifies which ElectrodeConfig is used in a particular ephys session. # For this ephys session we just populated, Electrodes 0-383 was used. @@ -151,14 +170,18 @@ # ## Create ClusteringTask and run/validate Clustering -dj.Diagram(ephys.EphysRecording) + ephys.ClusteringParamSet + ephys.ClusteringTask + \ -ephys.Clustering +( + dj.Diagram(ephys.EphysRecording) + + ephys.ClusteringParamSet + + ephys.ClusteringTask + + ephys.Clustering +) # The next major table in the ephys pipeline is the `ClusteringTask`. # -# + An entry in `ClusteringTask` indicates a set of clustering results generated from Kilosort2 outside `workflow-array-ephys` are ready be ingested. In a future release, an entry in `ClusteringTask` can also indicate a new Kilosort2 clustering job is ready to be triggered. +# + An entry in `ClusteringTask` indicates a set of clustering results generated from Kilosort2 outside `workflow-array-ephys` are ready be ingested. In a future release, an entry in `ClusteringTask` can also indicate a new Kilosort2 clustering job is ready to be triggered. # -# + The `ClusteringTask` table depends on the table `ClusteringParamSet`, which are the parameters of the clustering task and needed to be ingested first. +# + The `ClusteringTask` table depends on the table `ClusteringParamSet`, which are the parameters of the clustering task and needed to be ingested first. # A method of the class `ClusteringParamSet` called `insert_new_params` helps on the insertion of a parameter set and ensures the inserted one is not duplicated with existing parameter sets in the database. # @@ -187,26 +210,32 @@ "nSkipCov": 25, "scaleproc": 200, "nPCs": 3, - "useRAM": 0 + "useRAM": 0, } ephys.ClusteringParamSet.insert_new_params( - processing_method='kilosort2', + processing_method="kilosort2", paramset_idx=0, params=params_ks, - paramset_desc='Spike sorting using Kilosort2') + paramset_desc="Spike sorting using Kilosort2", +) ephys.ClusteringParamSet() -# We are then able to insert an entry into the `ClusteringTask` table. One important field of the table is `clustering_output_dir`, which specifies the Kilosort2 output directory for the later processing. +# We are then able to insert an entry into the `ClusteringTask` table. One important field of the table is `clustering_output_dir`, which specifies the Kilosort2 output directory for the later processing. # **Note**: this output dir is a relative path to be combined with `ephys_root_directory` in the config file. -ephys.ClusteringTask.describe(); +ephys.ClusteringTask.describe() ephys.ClusteringTask.heading ephys.ClusteringTask.insert1( - dict(session_key, insertion_number=0, paramset_idx=0, - clustering_output_dir='subject6/session1/towersTask_g0_imec0'), - skip_duplicates=True) + dict( + session_key, + insertion_number=0, + paramset_idx=0, + clustering_output_dir="subject6/session1/towersTask_g0_imec0", + ), + skip_duplicates=True, +) ephys.ClusteringTask() & session_key @@ -220,21 +249,30 @@ # We are now ready to ingest the clustering results (spike times etc.) into the database. These clustering results are either directly from Kilosort2 or with manual curation. Both ways share the same format of files. In the element, there is a `Curation` table that saves this information. -dj.Diagram(ephys.ClusteringTask) + dj.Diagram(ephys.Clustering) + dj.Diagram(ephys.Curation) + \ -dj.Diagram(ephys.CuratedClustering) + dj.Diagram(ephys.CuratedClustering.Unit) +( + dj.Diagram(ephys.ClusteringTask) + + dj.Diagram(ephys.Clustering) + + dj.Diagram(ephys.Curation) + + dj.Diagram(ephys.CuratedClustering) + + dj.Diagram(ephys.CuratedClustering.Unit) +) -ephys.Curation.describe(); +ephys.Curation.describe() ephys.Curation.heading ephys.Curation.insert1( - dict(session_key, insertion_number=0, paramset_idx=0, - curation_id=1, - curation_time='2021-04-28 15:47:01', - curation_output_dir='subject6/session1/towersTask_g0_imec0', - quality_control=0, - manual_curation=0 - )) + dict( + session_key, + insertion_number=0, + paramset_idx=0, + curation_id=1, + curation_time="2021-04-28 15:47:01", + curation_output_dir="subject6/session1/towersTask_g0_imec0", + quality_control=0, + manual_curation=0, + ) +) # In this case, the curation results are directly from Kilosort2 outputs, so the `curation_output_dir` is identical to `clustering_output_dir` in the table `ephys.ClusteringTask`. The `element-array-ephys` provides a helper function `ephys.Curation().create1_from_clustering_task` to conveniently insert an entry without manual curation. # @@ -259,7 +297,11 @@ # # + `LFP`: Mean local field potential across different electrodes. # # + `LFP.Electrode`: Local field potential of a given electrode. # + LFP and LFP.Electrode: By populating LFP, LFP of every other 9 electrode on the probe will be saved into table `ephys_element.LFP.Electrode` and an average LFP saved into table `ephys_element.LFP` -dj.Diagram(ephys.EphysRecording) + dj.Diagram(ephys.LFP) + dj.Diagram(ephys.LFP.Electrode) +( + dj.Diagram(ephys.EphysRecording) + + dj.Diagram(ephys.LFP) + + dj.Diagram(ephys.LFP.Electrode) +) # - # Takes a few minutes to populate @@ -290,9 +332,7 @@ # ## Summary and next step -# This notebook walks through the detailed steps running the workflow. +# This notebook walks through the detailed steps running the workflow. # # + For an more automated way running the workflow, refer to [04-automate](04-automate-optional.ipynb) # + In the next notebook [05-explore](05-explore.ipynb), we will introduce DataJoint methods to explore and visualize the ingested data. - - diff --git a/notebooks/py_scripts/04-automate-optional.py b/notebooks/py_scripts/04-automate-optional.py index 8615f616..f62b2207 100644 --- a/notebooks/py_scripts/04-automate-optional.py +++ b/notebooks/py_scripts/04-automate-optional.py @@ -20,7 +20,8 @@ # - import os -os.chdir('..') + +os.chdir("..") import numpy as np from workflow_array_ephys.pipeline import lab, subject, session, probe, ephys @@ -66,13 +67,14 @@ "nSkipCov": 25, "scaleproc": 200, "nPCs": 3, - "useRAM": 0 + "useRAM": 0, } ephys.ClusteringParamSet.insert_new_params( - clustering_method='kilosort2', + clustering_method="kilosort2", paramset_idx=0, params=params_ks, - paramset_desc='Spike sorting using Kilosort2') + paramset_desc="Spike sorting using Kilosort2", +) # - # ## Trigger autoprocessing of the remaining ephys pipeline @@ -90,17 +92,23 @@ # + the `paramset_idx` used for the clustering job # + the output directory storing the clustering results -session_key = session.Session.fetch1('KEY') +session_key = session.Session.fetch1("KEY") ephys.ClusteringTask.insert1( - dict(session_key, insertion_number=0, paramset_idx=0, - clustering_output_dir='subject6/session1/towersTask_g0_imec0'), skip_duplicates=True) + dict( + session_key, + insertion_number=0, + paramset_idx=0, + clustering_output_dir="subject6/session1/towersTask_g0_imec0", + ), + skip_duplicates=True, +) # run populate again for table Clustering process.run() # ## Insert new Curation to trigger ingestion of curated results -key = (ephys.ClusteringTask & session_key).fetch1('KEY') +key = (ephys.ClusteringTask & session_key).fetch1("KEY") ephys.Curation().create1_from_clustering_task(key) # run populate for the rest of the tables in the workflow, takes a while diff --git a/notebooks/py_scripts/05-explore.py b/notebooks/py_scripts/05-explore.py index a6054b97..90483c03 100644 --- a/notebooks/py_scripts/05-explore.py +++ b/notebooks/py_scripts/05-explore.py @@ -18,7 +18,8 @@ # This notebook will describe the steps for interacting with the data ingested into `workflow-array-ephys`. import os -os.chdir('..') + +os.chdir("..") # + import datajoint as dj @@ -36,7 +37,7 @@ # # + [element-session](https://github.com/datajoint/element-session) # # + [element-array-ephys](https://github.com/datajoint/element-array-ephys) # -# For the architecture and detailed descriptions for each of those elements, please visit the respective links. +# For the architecture and detailed descriptions for each of those elements, please visit the respective links. # # Below is the diagram describing the core components of the fully assembled pipeline. # @@ -44,7 +45,7 @@ dj.Diagram(ephys) + (dj.Diagram(session.Session) + 1) - 1 -# ## Browsing the data with DataJoint query and fetch +# ## Browsing the data with DataJoint query and fetch # # # DataJoint provides abundant functions to query data and fetch. For a detailed tutorials, visit our [general tutorial site](https://playground.datajoint.io/) @@ -58,7 +59,9 @@ session.Session() -session_key = (session.Session & 'subject="subject6"' & 'session_datetime = "2021-01-15 11:16:38"').fetch1('KEY') +session_key = ( + session.Session & 'subject="subject6"' & 'session_datetime = "2021-01-15 11:16:38"' +).fetch1("KEY") # ### `ephys.ProbeInsertion` and `ephys.EphysRecording` tables # @@ -72,8 +75,8 @@ # # + Spike-sorting is performed on a per-probe basis with the details stored in `ClusteringTask` and `Clustering` # -# + After the spike sorting, the results may go through curation process. -# + If it did not go through curation, a copy of `ClusteringTask` entry was inserted into table `ephys.Curation` with the `curation_ouput_dir` identicial to the `clustering_output_dir`. +# + After the spike sorting, the results may go through curation process. +# + If it did not go through curation, a copy of `ClusteringTask` entry was inserted into table `ephys.Curation` with the `curation_ouput_dir` identicial to the `clustering_output_dir`. # + If it did go through a curation, a new entry will be inserted into `ephys.Curation`, with a `curation_output_dir` specified. # + `ephys.Curation` supports multiple curations of a clustering task. @@ -89,7 +92,9 @@ # Let's pick one probe insertion and one `curation_id`, and further inspect the clustering results. -curation_key = (ephys.CuratedClustering & session_key & 'insertion_number = 0' & 'curation_id=1').fetch1('KEY') +curation_key = ( + ephys.CuratedClustering & session_key & "insertion_number = 0" & "curation_id=1" +).fetch1("KEY") ephys.CuratedClustering.Unit & curation_key @@ -99,39 +104,44 @@ ephys.CuratedClustering.Unit & curation_key & 'cluster_quality_label = "good"' -units, unit_spiketimes = (ephys.CuratedClustering.Unit - & curation_key - & 'cluster_quality_label = "good"').fetch('unit', 'spike_times') +units, unit_spiketimes = ( + ephys.CuratedClustering.Unit & curation_key & 'cluster_quality_label = "good"' +).fetch("unit", "spike_times") x = np.hstack(unit_spiketimes) y = np.hstack([np.full_like(s, u) for u, s in zip(units, unit_spiketimes)]) fig, ax = plt.subplots(1, 1, figsize=(32, 16)) -ax.plot(x, y, '|') -ax.set_xlabel('Time (s)'); -ax.set_ylabel('Unit'); +ax.plot(x, y, "|") +ax.set_xlabel("Time (s)") +ax.set_ylabel("Unit") # ### Plot waveform of a unit # Let's pick one unit and further inspect -unit_key = (ephys.CuratedClustering.Unit & curation_key & 'unit = 15').fetch1('KEY') +unit_key = (ephys.CuratedClustering.Unit & curation_key & "unit = 15").fetch1("KEY") ephys.CuratedClustering.Unit * ephys.WaveformSet.Waveform & unit_key -unit_data = (ephys.CuratedClustering.Unit * ephys.WaveformSet.PeakWaveform & unit_key).fetch1() +unit_data = ( + ephys.CuratedClustering.Unit * ephys.WaveformSet.PeakWaveform & unit_key +).fetch1() unit_data -sampling_rate = (ephys.EphysRecording & curation_key).fetch1('sampling_rate')/1000 # in kHz -plt.plot(np.r_[:unit_data['peak_electrode_waveform'].size] * 1/sampling_rate, unit_data['peak_electrode_waveform']) -plt.xlabel('Time (ms)'); -plt.ylabel(r'Voltage ($\mu$V)'); +sampling_rate = (ephys.EphysRecording & curation_key).fetch1( + "sampling_rate" +) / 1000 # in kHz +plt.plot( + np.r_[: unit_data["peak_electrode_waveform"].size] * 1 / sampling_rate, + unit_data["peak_electrode_waveform"], +) +plt.xlabel("Time (ms)") +plt.ylabel(r"Voltage ($\mu$V)") # ## Summary and Next Step -# This notebook highlights the major tables in the workflow and visualize some of the ingested results. +# This notebook highlights the major tables in the workflow and visualize some of the ingested results. # # The next notebook [06-drop](06-drop-optional.ipynb) shows how to drop schemas and tables if needed. - - diff --git a/notebooks/py_scripts/06-drop-optional.py b/notebooks/py_scripts/06-drop-optional.py index 239cf70e..36f254ed 100644 --- a/notebooks/py_scripts/06-drop-optional.py +++ b/notebooks/py_scripts/06-drop-optional.py @@ -15,14 +15,15 @@ # # Drop schemas # -# + Schemas are not typically dropped in a production workflow with real data in it. +# + Schemas are not typically dropped in a production workflow with real data in it. # + At the developmental phase, it might be required for the table redesign. # + When dropping all schemas is needed, the following is the dependency order. # Change into the parent directory to find the `dj_local_conf.json` file. import os -os.chdir('..') + +os.chdir("..") # + from workflow_array_ephys.pipeline import * diff --git a/notebooks/py_scripts/07-downstream-analysis.py b/notebooks/py_scripts/07-downstream-analysis.py index 0281755c..37aa3034 100644 --- a/notebooks/py_scripts/07-downstream-analysis.py +++ b/notebooks/py_scripts/07-downstream-analysis.py @@ -23,12 +23,17 @@ # First, let's change directories to find the `dj_local_conf` file. import os + # change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd())=='notebooks': os.chdir('..') -assert os.path.basename(os.getcwd())=='workflow-array-ephys', ("Please move to the " - + "workflow directory") +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +assert os.path.basename(os.getcwd()) == "workflow-array-ephys", ( + "Please move to the " + "workflow directory" +) # We'll be working with long tables, so we'll make visualization easier with a limit -import datajoint as dj; dj.config['display.limit']=10 +import datajoint as dj + +dj.config["display.limit"] = 10 # Next, we populate the python namespace with the required schemas @@ -39,9 +44,9 @@ # ## Trial and Event schemas # - -# Tables in the `trial` and `event` schemas specify the structure of your experiment, including block, trial and event timing. +# Tables in the `trial` and `event` schemas specify the structure of your experiment, including block, trial and event timing. # - Session has a 1-to-1 mapping with a behavior recording -# - A block is a continuous phase of an experiment that contains repeated instances of a condition, or trials. +# - A block is a continuous phase of an experiment that contains repeated instances of a condition, or trials. # - Events may occur within or outside of conditions, either instantaneous or continuous. # # The diagram below shows (a) the levels of hierarchy and (b) how the bounds may not completely overlap. A block may not fully capture trials and events may occur outside both blocks/trials. @@ -58,10 +63,16 @@ # Let's load some example data. The `ingest.py` script has a series of loaders to help. -from workflow_array_ephys.ingest import ingest_subjects, ingest_sessions,\ - ingest_events, ingest_alignment +from workflow_array_ephys.ingest import ( + ingest_subjects, + ingest_sessions, + ingest_events, + ingest_alignment, +) -ingest_subjects(); ingest_sessions(); ingest_events() +ingest_subjects() +ingest_sessions() +ingest_events() # We have 100 total trials, either 'stim' or 'ctrl', with start and stop time @@ -69,7 +80,7 @@ # Each trial is paired with one or more events that take place during the trial window. -trial.TrialEvent() & 'trial_id<5' +trial.TrialEvent() & "trial_id<5" # Finally, the `AlignmentEvent` describes the event of interest and the window we'd like to see around it. @@ -87,10 +98,14 @@ # For this example, we'll be looking at `subject6`. -clustering_key = (ephys.CuratedClustering - & {'subject': 'subject6', 'session_datetime': '2021-01-15 11:16:38', - 'insertion_number': 0} - ).fetch1('KEY') +clustering_key = ( + ephys.CuratedClustering + & { + "subject": "subject6", + "session_datetime": "2021-01-15 11:16:38", + "insertion_number": 0, + } +).fetch1("KEY") trial.Trial & clustering_key @@ -100,23 +115,32 @@ # The `analysis` schema provides example tables to perform event-aligned spike-times analysis. -(dj.Diagram(analysis) + dj.Diagram(event.AlignmentEvent) + dj.Diagram(trial.Trial) + - dj.Diagram(ephys.CuratedClustering)) +( + dj.Diagram(analysis) + + dj.Diagram(event.AlignmentEvent) + + dj.Diagram(trial.Trial) + + dj.Diagram(ephys.CuratedClustering) +) # + ***SpikesAlignmentCondition*** - a manual table to specify the inputs and condition for the analysis [markdown] # Let's start by creating an analysis configuration - i.e. inserting into ***SpikesAlignmentCondition*** for the `center` event, called `center_button` in the `AlignmentEvent` table. # + ***SpikesAlignment*** - a computed table to extract event-aligned spikes and compute unit PSTH -alignment_key = (event.AlignmentEvent & 'alignment_name = "center_button"' - ).fetch1('KEY') -alignment_condition = {**clustering_key, **alignment_key, - 'trial_condition': 'ctrl_center_button', - 'bin_size':.2} +alignment_key = (event.AlignmentEvent & 'alignment_name = "center_button"').fetch1( + "KEY" +) +alignment_condition = { + **clustering_key, + **alignment_key, + "trial_condition": "ctrl_center_button", + "bin_size": 0.2, +} analysis.SpikesAlignmentCondition.insert1(alignment_condition, skip_duplicates=True) -alignment_condition.pop('bin_size') +alignment_condition.pop("bin_size") analysis.SpikesAlignmentCondition.Trial.insert( (analysis.SpikesAlignmentCondition * ctrl_trials & alignment_condition).proj(), - skip_duplicates=True) + skip_duplicates=True, +) # + a CuratedClustering of interest for analysis [markdown] # With the steps above, we have created a new spike alignment condition for analysis, named `ctrl_center_button`, which retains all spiking information related to control trials during which the center button was pressed. @@ -126,14 +150,18 @@ # Now, let's create another set for the stimulus condition. # + a set of trials of interest to perform the analysis on - `stim` trials stim_trials = trial.Trial & clustering_key & 'trial_type = "stim"' -alignment_condition = {**clustering_key, **alignment_key, - 'trial_condition': 'stim_center_button', - 'bin_size':.2} +alignment_condition = { + **clustering_key, + **alignment_key, + "trial_condition": "stim_center_button", + "bin_size": 0.2, +} analysis.SpikesAlignmentCondition.insert1(alignment_condition, skip_duplicates=True) -alignment_condition.pop('bin_size') +alignment_condition.pop("bin_size") analysis.SpikesAlignmentCondition.Trial.insert( (analysis.SpikesAlignmentCondition * stim_trials & alignment_condition).proj(), - skip_duplicates=True) + skip_duplicates=True, +) # + a set of trials of interest to perform the analysis on - `stim` trials [markdown] # We can compare conditions in the `SpikesAlignmentCondition` table. @@ -158,19 +186,31 @@ # We can visualize the results with the `plot` function with our keys. # - -clustering_key = (ephys.CuratedClustering - & {'subject': 'subject6', 'session_datetime': '2021-01-15 11:16:38', - 'insertion_number': 0} - ).fetch1('KEY') -alignment_key = (event.AlignmentEvent & 'alignment_name = "center_button"' - ).fetch1('KEY') +clustering_key = ( + ephys.CuratedClustering + & { + "subject": "subject6", + "session_datetime": "2021-01-15 11:16:38", + "insertion_number": 0, + } +).fetch1("KEY") +alignment_key = (event.AlignmentEvent & 'alignment_name = "center_button"').fetch1( + "KEY" +) # + a set of trials of interest to perform the analysis on - `stim` trials -alignment_condition = {**clustering_key, **alignment_key, 'trial_condition': 'ctrl_center_button'} -analysis.SpikesAlignment().plot(alignment_condition, unit=2); +alignment_condition = { + **clustering_key, + **alignment_key, + "trial_condition": "ctrl_center_button", +} +analysis.SpikesAlignment().plot(alignment_condition, unit=2) # + a set of trials of interest to perform the analysis on - `stim` trials -alignment_condition = {**clustering_key, **alignment_key, 'trial_condition': 'stim_center_button'} -analysis.SpikesAlignment().plot(alignment_condition, unit=2); +alignment_condition = { + **clustering_key, + **alignment_key, + "trial_condition": "stim_center_button", +} +analysis.SpikesAlignment().plot(alignment_condition, unit=2) # - - diff --git a/notebooks/py_scripts/08-electrode-localization.py b/notebooks/py_scripts/08-electrode-localization.py index 05d575cc..81e9be02 100644 --- a/notebooks/py_scripts/08-electrode-localization.py +++ b/notebooks/py_scripts/08-electrode-localization.py @@ -18,12 +18,17 @@ # Change into the parent directory to find the `dj_local_conf.json` file. import os + # change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd())=='notebooks': os.chdir('..') -assert os.path.basename(os.getcwd())=='workflow-array-ephys', ("Please move to the " - + "workflow directory") +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +assert os.path.basename(os.getcwd()) == "workflow-array-ephys", ( + "Please move to the " + "workflow directory" +) # We'll be working with long tables, so we'll make visualization easier with a limit -import datajoint as dj; dj.config['display.limit']=10 +import datajoint as dj + +dj.config["display.limit"] = 10 # + [markdown] tags=[] jp-MarkdownHeadingCollapsed=true jp-MarkdownHeadingCollapsed=true tags=[] # ## Coordinate Framework @@ -47,20 +52,22 @@ # The acronyms listed in the DataJoint table differ slightly from the CCF standard by substituting case-sensitive differences with [snake case](https://en.wikipedia.org/wiki/Snake_case). To lookup the snake case equivalent, use the `retrieve_acronym` function. -central_thalamus = ccf.BrainRegionAnnotation.retrieve_acronym('CM') -cranial_nerves = ccf.BrainRegionAnnotation.retrieve_acronym('cm') -print(f'CM: {central_thalamus}\ncm: {cranial_nerves}') +central_thalamus = ccf.BrainRegionAnnotation.retrieve_acronym("CM") +cranial_nerves = ccf.BrainRegionAnnotation.retrieve_acronym("cm") +print(f"CM: {central_thalamus}\ncm: {cranial_nerves}") # If your work requires the case-sensitive columns please get in touch with the DataJoint team via [StackOverflow](https://stackoverflow.com/questions/tagged/datajoint). # # For this demo, let's look at the dimensions of the central thalamus. To look at other regions, open the CSV you downloaded and search for your desired region. -cm_voxels = ccf.BrainRegionAnnotation.Voxel() & f'acronym=\"{central_thalamus}\"' +cm_voxels = ccf.BrainRegionAnnotation.Voxel() & f'acronym="{central_thalamus}"' cm_voxels -cm_x, cm_y, cm_z = cm_voxels.fetch('x', 'y', 'z') -print(f'The central thalamus extends from \n\tx = {min(cm_x)} to x = {max(cm_x)}\n\t' - + f'y = {min(cm_y)} to y = {max(cm_y)}\n\tz = {min(cm_z)} to z = {max(cm_z)}') +cm_x, cm_y, cm_z = cm_voxels.fetch("x", "y", "z") +print( + f"The central thalamus extends from \n\tx = {min(cm_x)} to x = {max(cm_x)}\n\t" + + f"y = {min(cm_y)} to y = {max(cm_y)}\n\tz = {min(cm_z)} to z = {max(cm_z)}" +) # ## Electrode Localization @@ -74,7 +81,8 @@ # Because the probe may not be fully inserted, there will be some electrode positions that occur outside the brain. We register these instances with an `IntegrityError` warning because we're trying to register a coordinate position with no corresponding location in the `ccf.CCF.Voxel` table. We can silence these warnings by setting the log level before running `populate()` on the `ElectrodePosition` table. import logging -logging.getLogger().setLevel(logging.ERROR) # or logging.INFO + +logging.getLogger().setLevel(logging.ERROR) # or logging.INFO eloc.ElectrodePosition.populate() @@ -85,23 +93,25 @@ # Let's focus on `subject5`, insertion `1`. from workflow_array_ephys.pipeline import ephys -key=(ephys.EphysRecording & 'subject="subject5"' & 'insertion_number=1').fetch1('KEY') + +key = (ephys.EphysRecording & 'subject="subject5"' & "insertion_number=1").fetch1("KEY") len(eloc.ElectrodePosition.Electrode & key) # With a resolution of 100μm, adjacent electrodes will very likely be in the same region. Let's look at every 38th electrode to sample 10 across the probe. # # If you're interested in more electrodes, decrease the number next to the `%` modulo operator. -electrode_coordinates = (eloc.ElectrodePosition.Electrode & 'electrode%38=0' - & key).fetch('electrode', 'x', 'y', 'z', as_dict=True) +electrode_coordinates = ( + eloc.ElectrodePosition.Electrode & "electrode%38=0" & key +).fetch("electrode", "x", "y", "z", as_dict=True) for e in electrode_coordinates: - x, y, z = [ e[k] for k in ('x','y', 'z')] - acronym = (ccf.BrainRegionAnnotation.Voxel & f'x={x}' & f'y={y}' & f'z={z}' - ).fetch1('acronym') - e['region'] = (ccf.BrainRegionAnnotation.BrainRegion & f'acronym=\"{acronym}\"' - ).fetch1('region_name') - print('Electrode {electrode} (x={x}, y={y}, z={z}) is in {region}'.format(**e)) + x, y, z = [e[k] for k in ("x", "y", "z")] + acronym = (ccf.BrainRegionAnnotation.Voxel & f"x={x}" & f"y={y}" & f"z={z}").fetch1( + "acronym" + ) + e["region"] = ( + ccf.BrainRegionAnnotation.BrainRegion & f'acronym="{acronym}"' + ).fetch1("region_name") + print("Electrode {electrode} (x={x}, y={y}, z={z}) is in {region}".format(**e)) eloc.ElectrodePosition.Electrode() - - diff --git a/notebooks/py_scripts/09-NWB-export.py b/notebooks/py_scripts/09-NWB-export.py index cb8604b9..8d74b7af 100644 --- a/notebooks/py_scripts/09-NWB-export.py +++ b/notebooks/py_scripts/09-NWB-export.py @@ -21,30 +21,38 @@ # First, let's change directories to find the `dj_local_conf` file. import os + # change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd())=='notebooks': os.chdir('..') -assert os.path.basename(os.getcwd())=='workflow-array-ephys', ("Please move to the " - + "workflow directory") +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +assert os.path.basename(os.getcwd()) == "workflow-array-ephys", ( + "Please move to the " + "workflow directory" +) # We'll be working with long tables, so we'll make visualization easier with a limit -import datajoint as dj; dj.config['display.limit']=10 +import datajoint as dj + +dj.config["display.limit"] = 10 -# If you haven't already populated the `lab`, `subject`, `session`, `probe`, and `ephys` schemas, please do so now with [04-automate](./04-automate-optional.ipynb). Note: exporting `ephys` data is currently only supported on the `no_curation` schema. +# If you haven't already populated the `lab`, `subject`, `session`, `probe`, and `ephys` schemas, please do so now with [04-automate](./04-automate-optional.ipynb). Note: exporting `ephys` data is currently only supported on the `no_curation` schema. from workflow_array_ephys.pipeline import lab, subject, session, probe, ephys -from workflow_array_ephys.export import (element_lab_to_nwb_dict, subject_to_nwb, - session_to_nwb, ecephys_session_to_nwb, - write_nwb) +from workflow_array_ephys.export import ( + element_lab_to_nwb_dict, + subject_to_nwb, + session_to_nwb, + ecephys_session_to_nwb, + write_nwb, +) from element_interface.dandi import upload_to_dandi # ## Export to NWB # # We'll use the following keys to demonstrate export functions. -lab_key={"lab": "LabA"} -protocol_key={"protocol": "ProtA"} -project_key={"project": "ProjA"} -session_key={"subject": "subject5", - "session_datetime": "2018-07-03 20:32:28"} +lab_key = {"lab": "LabA"} +protocol_key = {"protocol": "ProtA"} +project_key = {"project": "ProjA"} +session_key = {"subject": "subject5", "session_datetime": "2018-07-03 20:32:28"} # ### Upstream Elements # @@ -57,12 +65,13 @@ # Note: `pynwb` will display a warning regarding timezone information - datetime fields are assumed to be in local time, and will be converted to UTC. # -print('Lab:\n') -element_lab_to_nwb_dict(lab_key=lab_key, protocol_key=protocol_key, - project_key=project_key) -print('\nAnimal:\n') +print("Lab:\n") +element_lab_to_nwb_dict( + lab_key=lab_key, protocol_key=protocol_key, project_key=project_key +) +print("\nAnimal:\n") subject_to_nwb(session_key=session_key) -print('\nSession:\n') +print("\nSession:\n") session_to_nwb(session_key=session_key) # ### Element Array Electrophysiology @@ -72,15 +81,17 @@ help(ecephys_session_to_nwb) -nwbfile = ecephys_session_to_nwb(session_key=session_key, - raw=True, - spikes=True, - lfp="dj", - end_frame=100, - lab_key=lab_key, - project_key=project_key, - protocol_key=protocol_key, - nwbfile_kwargs=None) +nwbfile = ecephys_session_to_nwb( + session_key=session_key, + raw=True, + spikes=True, + lfp="dj", + end_frame=100, + lab_key=lab_key, + project_key=project_key, + protocol_key=protocol_key, + nwbfile_kwargs=None, +) nwbfile @@ -88,7 +99,7 @@ # + import time - + write_nwb(nwbfile, f'./temp_nwb/{time.strftime("_test_%Y%m%d-%H%M%S.nwb")}') # - @@ -103,17 +114,16 @@ # # These values can be added to your `dj.config` as follows: -dj.config['custom']['dandiset_id']="" -dj.config['custom']['dandi.api']="<40-character alphanumeric string>" +dj.config["custom"]["dandiset_id"] = "" +dj.config["custom"]["dandi.api"] = "<40-character alphanumeric string>" # This would facilitate routine updating of your dandiset. upload_to_dandi( data_directory="./temp_nwb/", - dandiset_id=dj.config['custom']['dandiset_id'], + dandiset_id=dj.config["custom"]["dandiset_id"], staging=True, working_directory="./temp_nwb/", - api_key=dj.config['custom']['dandi.api'], - sync=False) - - + api_key=dj.config["custom"]["dandi.api"], + sync=False, +) diff --git a/setup.py b/setup.py index 8234bb8b..fe60017a 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages from os import path -pkg_name = 'workflow_array_ephys' +pkg_name = "workflow_array_ephys" here = path.abspath(path.dirname(__file__)) long_description = """" @@ -14,22 +14,22 @@ + [element-array-ephys](https://github.com/datajoint/element-array-ephys) """ -with open(path.join(here, 'requirements.txt')) as f: +with open(path.join(here, "requirements.txt")) as f: requirements = f.read().splitlines() -with open(path.join(here, pkg_name, 'version.py')) as f: +with open(path.join(here, pkg_name, "version.py")) as f: exec(f.read()) setup( - name='workflow-array-ephys', + name="workflow-array-ephys", version=__version__, description="Extracellular electrophysiology pipeline using the DataJoint elements", long_description=long_description, - author='DataJoint', - author_email='info@datajoint.com', - license='MIT', - url='https://github.com/datajoint/workflow-array-ephys', - keywords='neuroscience datajoint ephys', - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), + author="DataJoint", + author_email="info@datajoint.com", + license="MIT", + url="https://github.com/datajoint/workflow-array-ephys", + keywords="neuroscience datajoint ephys", + packages=find_packages(exclude=["contrib", "docs", "tests*"]), install_requires=requirements, ) diff --git a/tests/__init__.py b/tests/__init__.py index 0b1ded47..b22e1b86 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -19,16 +19,18 @@ _tear_down = False verbose = True -pathlib.Path('./tests/user_data').mkdir(exist_ok=True) -pathlib.Path('./tests/user_data/lab').mkdir(exist_ok=True) - -sessions_dirs = ['subject1/session1', - 'subject2/session1', - 'subject2/session2', - 'subject3/session1', - 'subject4/experiment1', - 'subject5/session1', - 'subject6/session1'] +pathlib.Path("./tests/user_data").mkdir(exist_ok=True) +pathlib.Path("./tests/user_data/lab").mkdir(exist_ok=True) + +sessions_dirs = [ + "subject1/session1", + "subject2/session1", + "subject2/session2", + "subject3/session1", + "subject4/experiment1", + "subject5/session1", + "subject6/session1", +] # -------------------- HELPER CLASS -------------------- @@ -39,38 +41,44 @@ def write_csv(content, path): :param path: pathlib PosixPath :param content: list of strings, each as row of CSV """ - with open(path, 'w') as f: + with open(path, "w") as f: for line in content: - f.write(line+'\n') + f.write(line + "\n") class QuietStdOut: """If verbose set to false, used to quiet tear_down table.delete prints""" + def __enter__(self): self._original_stdout = sys.stdout - sys.stdout = open(os.devnull, 'w') + sys.stdout = open(os.devnull, "w") def __exit__(self, exc_type, exc_val, exc_tb): sys.stdout.close() sys.stdout = self._original_stdout + # ---------------------- FIXTURES ---------------------- @pytest.fixture(autouse=True) def dj_config(): - """ If dj_local_config exists, load""" - if pathlib.Path('./dj_local_conf.json').exists(): - dj.config.load('./dj_local_conf.json') - dj.config['safemode'] = False - dj.config['custom'] = { - 'ephys_mode': (os.environ.get('EPHYS_MODE') - or dj.config['custom']['ephys_mode']), - 'database.prefix': (os.environ.get('DATABASE_PREFIX') - or dj.config['custom']['database.prefix']), - 'ephys_root_data_dir': (os.environ.get('EPHYS_ROOT_DATA_DIR').split(',') - if os.environ.get('EPHYS_ROOT_DATA_DIR') - else dj.config['custom']['ephys_root_data_dir']) + """If dj_local_config exists, load""" + if pathlib.Path("./dj_local_conf.json").exists(): + dj.config.load("./dj_local_conf.json") + dj.config["safemode"] = False + dj.config["custom"] = { + "ephys_mode": ( + os.environ.get("EPHYS_MODE") or dj.config["custom"]["ephys_mode"] + ), + "database.prefix": ( + os.environ.get("DATABASE_PREFIX") or dj.config["custom"]["database.prefix"] + ), + "ephys_root_data_dir": ( + os.environ.get("EPHYS_ROOT_DATA_DIR").split(",") + if os.environ.get("EPHYS_ROOT_DATA_DIR") + else dj.config["custom"]["ephys_root_data_dir"] + ), } return @@ -84,50 +92,62 @@ def test_data(dj_config): try: find_full_path(get_ephys_root_data_dir(), p) except FileNotFoundError: - test_data_exists = False # If data not found + test_data_exists = False # If data not found - if not test_data_exists: # attempt to djArchive dowload + if not test_data_exists: # attempt to djArchive dowload try: - dj.config['custom'].update({ - 'djarchive.client.endpoint': - os.environ['DJARCHIVE_CLIENT_ENDPOINT'], - 'djarchive.client.bucket': - os.environ['DJARCHIVE_CLIENT_BUCKET'], - 'djarchive.client.access_key': - os.environ['DJARCHIVE_CLIENT_ACCESSKEY'], - 'djarchive.client.secret_key': - os.environ['DJARCHIVE_CLIENT_SECRETKEY'] - }) + dj.config["custom"].update( + { + "djarchive.client.endpoint": os.environ[ + "DJARCHIVE_CLIENT_ENDPOINT" + ], + "djarchive.client.bucket": os.environ["DJARCHIVE_CLIENT_BUCKET"], + "djarchive.client.access_key": os.environ[ + "DJARCHIVE_CLIENT_ACCESSKEY" + ], + "djarchive.client.secret_key": os.environ[ + "DJARCHIVE_CLIENT_SECRETKEY" + ], + } + ) except KeyError as e: raise FileNotFoundError( - f' Full test data not available.' - f'\nAttempting to download from DJArchive,' - f' but no credentials found in environment variables.' - f'\nError: {str(e)}') + f" Full test data not available." + f"\nAttempting to download from DJArchive," + f" but no credentials found in environment variables." + f"\nError: {str(e)}" + ) import djarchive_client + client = djarchive_client.client() test_data_dir = get_ephys_root_data_dir() if isinstance(test_data_dir, list): # if multiple root dirs, first test_data_dir = test_data_dir[0] - client.download('workflow-array-ephys-benchmark', - 'v2', - str(test_data_dir), create_target=False) + client.download( + "workflow-array-ephys-benchmark", + "v2", + str(test_data_dir), + create_target=False, + ) return @pytest.fixture def pipeline(): from workflow_array_ephys import pipeline - yield {'subject': pipeline.subject, - 'lab': pipeline.lab, - 'ephys': pipeline.ephys, - 'probe': pipeline.probe, - 'session': pipeline.session, - 'get_ephys_root_data_dir': pipeline.get_ephys_root_data_dir, - 'ephys_mode': pipeline.ephys_mode} + + yield { + "subject": pipeline.subject, + "lab": pipeline.lab, + "ephys": pipeline.ephys, + "probe": pipeline.probe, + "session": pipeline.session, + "get_ephys_root_data_dir": pipeline.get_ephys_root_data_dir, + "ephys_mode": pipeline.ephys_mode, + } if verbose and _tear_down: pipeline.subject.Subject.delete() @@ -138,18 +158,19 @@ def pipeline(): @pytest.fixture def lab_csv(): - """ Create a 'labs.csv' file""" - lab_content = ["lab,lab_name,institution,address," - + "time_zone,location,location_description", - "LabA,The Example Lab,Example Uni," - + "'221B Baker St,London NW1 6XE,UK',UTC+0," - + "Example Building,'2nd floor lab dedicated to all " - + "fictional experiments.'", - "LabB,The Other Lab,Other Uni," - + "'Oxford OX1 2JD, United Kingdom',UTC+0," - + "Other Building,'fictional campus dedicated to imaginary" - + "experiments.'"] - lab_csv_path = pathlib.Path('./tests/user_data/lab/labs.csv') + """Create a 'labs.csv' file""" + lab_content = [ + "lab,lab_name,institution,address," + "time_zone,location,location_description", + "LabA,The Example Lab,Example Uni," + + "'221B Baker St,London NW1 6XE,UK',UTC+0," + + "Example Building,'2nd floor lab dedicated to all " + + "fictional experiments.'", + "LabB,The Other Lab,Other Uni," + + "'Oxford OX1 2JD, United Kingdom',UTC+0," + + "Other Building,'fictional campus dedicated to imaginary" + + "experiments.'", + ] + lab_csv_path = pathlib.Path("./tests/user_data/lab/labs.csv") write_csv(lab_content, lab_csv_path) yield lab_content, lab_csv_path @@ -158,18 +179,19 @@ def lab_csv(): @pytest.fixture def lab_project_csv(): - """ Create a 'projects.csv' file""" - lab_project_content = ["project,project_description,repository_url," - + "repository_name,codeurl", - "ProjA,Example project to populate element-lab," - + "https://github.com/datajoint/element-lab/," - + "element-lab,https://github.com/datajoint/element" - + "-lab/tree/main/element_lab", - "ProjB,Other example project to populate element-" - + "lab,https://github.com/datajoint/element-session" - + "/,element-session,https://github.com/datajoint/" - + "element-session/tree/main/element_session"] - lab_project_csv_path = pathlib.Path('./tests/user_data/lab/projects.csv') + """Create a 'projects.csv' file""" + lab_project_content = [ + "project,project_description,repository_url," + "repository_name,codeurl", + "ProjA,Example project to populate element-lab," + + "https://github.com/datajoint/element-lab/," + + "element-lab,https://github.com/datajoint/element" + + "-lab/tree/main/element_lab", + "ProjB,Other example project to populate element-" + + "lab,https://github.com/datajoint/element-session" + + "/,element-session,https://github.com/datajoint/" + + "element-session/tree/main/element_session", + ] + lab_project_csv_path = pathlib.Path("./tests/user_data/lab/projects.csv") write_csv(lab_project_content, lab_project_csv_path) yield lab_project_content, lab_project_csv_path @@ -178,15 +200,19 @@ def lab_project_csv(): @pytest.fixture def lab_project_users_csv(): - """ Create a 'project_users.csv' file""" - lab_project_user_content = ["user,project", - "Sherlock,ProjA", - "Sherlock,ProjB", - "Watson,ProjB", - "Dr. Candace Pert,ProjA", - "User1,ProjA"] - lab_project_user_csv_path = pathlib.Path('./tests/user_data/lab/\ - project_users.csv') + """Create a 'project_users.csv' file""" + lab_project_user_content = [ + "user,project", + "Sherlock,ProjA", + "Sherlock,ProjB", + "Watson,ProjB", + "Dr. Candace Pert,ProjA", + "User1,ProjA", + ] + lab_project_user_csv_path = pathlib.Path( + "./tests/user_data/lab/\ + project_users.csv" + ) write_csv(lab_project_user_content, lab_project_user_csv_path) yield lab_project_user_content, lab_project_user_csv_path @@ -195,12 +221,16 @@ def lab_project_users_csv(): @pytest.fixture def lab_publications_csv(): - """ Create a 'publications.csv' file""" - lab_publication_content = ["project,publication", - "ProjA,arXiv:1807.11104", - "ProjA,arXiv:1807.11104v1"] - lab_publication_csv_path = pathlib.Path('./tests/user_data/lab/\ - publications.csv') + """Create a 'publications.csv' file""" + lab_publication_content = [ + "project,publication", + "ProjA,arXiv:1807.11104", + "ProjA,arXiv:1807.11104v1", + ] + lab_publication_csv_path = pathlib.Path( + "./tests/user_data/lab/\ + publications.csv" + ) write_csv(lab_publication_content, lab_publication_csv_path) yield lab_publication_content, lab_publication_csv_path @@ -209,12 +239,14 @@ def lab_publications_csv(): @pytest.fixture def lab_keywords_csv(): - """ Create a 'keywords.csv' file""" - lab_keyword_content = ["project,keyword", - "ProjA,Study", - "ProjA,Example", - "ProjB,Alternate"] - lab_keyword_csv_path = pathlib.Path('./tests/user_data/lab/keywords.csv') + """Create a 'keywords.csv' file""" + lab_keyword_content = [ + "project,keyword", + "ProjA,Study", + "ProjA,Example", + "ProjB,Alternate", + ] + lab_keyword_csv_path = pathlib.Path("./tests/user_data/lab/keywords.csv") write_csv(lab_keyword_content, lab_keyword_csv_path) yield lab_keyword_content, lab_keyword_csv_path @@ -223,13 +255,13 @@ def lab_keywords_csv(): @pytest.fixture def lab_protocol_csv(): - """ Create a 'protocols.csv' file""" - lab_protocol_content = ["protocol,protocol_type,protocol_description", - "ProtA,IRB expedited review,Protocol for managing " - + "data ingestion", - "ProtB,Alternative Method,Limited protocol for " - + "piloting only"] - lab_protocol_csv_path = pathlib.Path('./tests/user_data/lab/protocols.csv') + """Create a 'protocols.csv' file""" + lab_protocol_content = [ + "protocol,protocol_type,protocol_description", + "ProtA,IRB expedited review,Protocol for managing " + "data ingestion", + "ProtB,Alternative Method,Limited protocol for " + "piloting only", + ] + lab_protocol_csv_path = pathlib.Path("./tests/user_data/lab/protocols.csv") write_csv(lab_protocol_content, lab_protocol_csv_path) yield lab_protocol_content, lab_protocol_csv_path @@ -238,16 +270,16 @@ def lab_protocol_csv(): @pytest.fixture def lab_user_csv(): - """ Create a 'users.csv' file""" - lab_user_content = ["lab,user,user_role,user_email,user_cellphone", - "LabA,Sherlock,PI,Sherlock@BakerSt.com," - + "+44 20 7946 0344", - "LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763", - "LabB,Dr. Candace Pert,PI,Pert@gmail.com," - + "+44 74 4046 5899", - "LabA,User1,Lab Tech,fake@email.com,+44 1632 960103", - "LabB,User2,Lab Tech,fake2@email.com,+44 1632 960102"] - lab_user_csv_path = pathlib.Path('./tests/user_data/lab/users.csv') + """Create a 'users.csv' file""" + lab_user_content = [ + "lab,user,user_role,user_email,user_cellphone", + "LabA,Sherlock,PI,Sherlock@BakerSt.com," + "+44 20 7946 0344", + "LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763", + "LabB,Dr. Candace Pert,PI,Pert@gmail.com," + "+44 74 4046 5899", + "LabA,User1,Lab Tech,fake@email.com,+44 1632 960103", + "LabB,User2,Lab Tech,fake2@email.com,+44 1632 960102", + ] + lab_user_csv_path = pathlib.Path("./tests/user_data/lab/users.csv") write_csv(lab_user_content, lab_user_csv_path) yield lab_user_content, lab_user_csv_path @@ -255,11 +287,19 @@ def lab_user_csv(): @pytest.fixture -def ingest_lab(pipeline, lab_csv, lab_project_csv, lab_publications_csv, - lab_keywords_csv, lab_protocol_csv, lab_user_csv, - lab_project_users_csv): - """ From workflow_array_ephys ingest.py, import ingest_lab, run """ +def ingest_lab( + pipeline, + lab_csv, + lab_project_csv, + lab_publications_csv, + lab_keywords_csv, + lab_protocol_csv, + lab_user_csv, + lab_project_users_csv, +): + """From workflow_array_ephys ingest.py, import ingest_lab, run""" from workflow_array_ephys.ingest import ingest_lab + _, lab_csv_path = lab_csv _, lab_project_csv_path = lab_project_csv _, lab_publication_csv_path = lab_publications_csv @@ -267,47 +307,64 @@ def ingest_lab(pipeline, lab_csv, lab_project_csv, lab_publications_csv, _, lab_protocol_csv_path = lab_protocol_csv _, lab_user_csv_path = lab_user_csv _, lab_project_user_csv_path = lab_project_users_csv - ingest_lab(lab_csv_path=lab_csv_path, - project_csv_path=lab_project_csv_path, - publication_csv_path=lab_publication_csv_path, - keyword_csv_path=lab_keyword_csv_path, - protocol_csv_path=lab_protocol_csv_path, - users_csv_path=lab_user_csv_path, - project_user_csv_path=lab_project_user_csv_path, verbose=verbose) + ingest_lab( + lab_csv_path=lab_csv_path, + project_csv_path=lab_project_csv_path, + publication_csv_path=lab_publication_csv_path, + keyword_csv_path=lab_keyword_csv_path, + protocol_csv_path=lab_protocol_csv_path, + users_csv_path=lab_user_csv_path, + project_user_csv_path=lab_project_user_csv_path, + verbose=verbose, + ) return @pytest.fixture def subjects_csv(): - """ Create a 'subjects.csv' file""" - input_subjects = pd.DataFrame(columns=['subject', 'sex', - 'subject_birth_date', - 'subject_description']) - input_subjects.subject = ['subject1', 'subject2', - 'subject3', 'subject4', - 'subject5', 'subject6'] - input_subjects.sex = ['F', 'M', 'M', 'M', 'F', 'F'] - input_subjects.subject_birth_date = ['2020-01-01 00:00:01', - '2020-01-01 00:00:01', - '2020-01-01 00:00:01', - '2020-01-01 00:00:01', - '2020-01-01 00:00:01', - '2020-01-01 00:00:01'] - input_subjects.subject_description = ['dl56', 'SC035', 'SC038', - 'oe_talab', 'rich', 'manuel'] - input_subjects = input_subjects.set_index('subject') - - subjects_csv_path = pathlib.Path('./tests/user_data/subjects.csv') - input_subjects.to_csv(subjects_csv_path) # write csv file + """Create a 'subjects.csv' file""" + input_subjects = pd.DataFrame( + columns=["subject", "sex", "subject_birth_date", "subject_description"] + ) + input_subjects.subject = [ + "subject1", + "subject2", + "subject3", + "subject4", + "subject5", + "subject6", + ] + input_subjects.sex = ["F", "M", "M", "M", "F", "F"] + input_subjects.subject_birth_date = [ + "2020-01-01 00:00:01", + "2020-01-01 00:00:01", + "2020-01-01 00:00:01", + "2020-01-01 00:00:01", + "2020-01-01 00:00:01", + "2020-01-01 00:00:01", + ] + input_subjects.subject_description = [ + "dl56", + "SC035", + "SC038", + "oe_talab", + "rich", + "manuel", + ] + input_subjects = input_subjects.set_index("subject") + + subjects_csv_path = pathlib.Path("./tests/user_data/subjects.csv") + input_subjects.to_csv(subjects_csv_path) # write csv file yield input_subjects, subjects_csv_path - subjects_csv_path.unlink() # delete csv file after use + subjects_csv_path.unlink() # delete csv file after use @pytest.fixture def ingest_subjects(pipeline, subjects_csv): from workflow_array_ephys.ingest import ingest_subjects + _, subjects_csv_path = subjects_csv ingest_subjects(subjects_csv_path, verbose=verbose) return @@ -315,27 +372,42 @@ def ingest_subjects(pipeline, subjects_csv): @pytest.fixture def sessions_csv(test_data): - """ Create a 'sessions.csv' file""" - input_sessions = pd.DataFrame(columns=['subject', 'session_dir', 'session_note', - 'user']) - input_sessions.subject = ['subject1', 'subject2', 'subject2', - 'subject3', 'subject4', 'subject5', - 'subject6'] + """Create a 'sessions.csv' file""" + input_sessions = pd.DataFrame( + columns=["subject", "session_dir", "session_note", "user"] + ) + input_sessions.subject = [ + "subject1", + "subject2", + "subject2", + "subject3", + "subject4", + "subject5", + "subject6", + ] input_sessions.session_dir = sessions_dirs - input_sessions.session_note = ['Data collection notes', - 'Data collection notes', - 'Interrupted session', - 'Data collection notes', - 'Successful data collection', - 'Successful data collection', - 'Ambient temp abnormally low'] - input_sessions.user = ['User2', 'User2', 'User2', - 'User1', 'User2', 'User1', - 'User2'] - - input_sessions = input_sessions.set_index('subject') - - sessions_csv_path = pathlib.Path('./tests/user_data/sessions.csv') + input_sessions.session_note = [ + "Data collection notes", + "Data collection notes", + "Interrupted session", + "Data collection notes", + "Successful data collection", + "Successful data collection", + "Ambient temp abnormally low", + ] + input_sessions.user = [ + "User2", + "User2", + "User2", + "User1", + "User2", + "User1", + "User2", + ] + + input_sessions = input_sessions.set_index("subject") + + sessions_csv_path = pathlib.Path("./tests/user_data/sessions.csv") input_sessions.to_csv(sessions_csv_path) # write csv file yield input_sessions, sessions_csv_path @@ -346,6 +418,7 @@ def sessions_csv(test_data): @pytest.fixture def ingest_sessions(ingest_subjects, sessions_csv): from workflow_array_ephys.ingest import ingest_sessions + _, sessions_csv_path = sessions_csv ingest_sessions(sessions_csv_path, verbose=verbose) return @@ -353,34 +426,39 @@ def ingest_sessions(ingest_subjects, sessions_csv): @pytest.fixture def testdata_paths(): - """ Paths for test data 'subjectX/sessionY/probeZ/etc'""" + """Paths for test data 'subjectX/sessionY/probeZ/etc'""" return { - 'npx3A-p1-ks': 'subject5/session1/probe_1/ks2.1_01', - 'npx3A-p2-ks': 'subject5/session1/probe_2/ks2.1_01', - 'oe_npx3B-ks': 'subject4/experiment1/recording1/continuous/' - + 'Neuropix-PXI-100.0/ks', - 'sglx_npx3A-p1': 'subject5/session1/probe_1', - 'oe_npx3B': 'subject4/experiment1/recording1/continuous/' - + 'Neuropix-PXI-100.0', - 'sglx_npx3B-p1': 'subject6/session1/towersTask_g0_imec0', - 'npx3B-p1-ks': 'subject6/session1/towersTask_g0_imec0' + "npx3A-p1-ks": "subject5/session1/probe_1/ks2.1_01", + "npx3A-p2-ks": "subject5/session1/probe_2/ks2.1_01", + "oe_npx3B-ks": "subject4/experiment1/recording1/continuous/" + + "Neuropix-PXI-100.0/ks", + "sglx_npx3A-p1": "subject5/session1/probe_1", + "oe_npx3B": "subject4/experiment1/recording1/continuous/" + + "Neuropix-PXI-100.0", + "sglx_npx3B-p1": "subject6/session1/towersTask_g0_imec0", + "npx3B-p1-ks": "subject6/session1/towersTask_g0_imec0", } @pytest.fixture def ephys_insertionlocation(pipeline, ingest_sessions): """Insert probe location into ephys.InsertionLocation""" - ephys = pipeline['ephys'] - - for probe_insertion_key in ephys.ProbeInsertion.fetch('KEY'): - ephys.InsertionLocation.insert1(dict(**probe_insertion_key, - skull_reference='Bregma', - ap_location=0, - ml_location=0, - depth=0, - theta=0, - phi=0, - beta=0), skip_duplicates=True) + ephys = pipeline["ephys"] + + for probe_insertion_key in ephys.ProbeInsertion.fetch("KEY"): + ephys.InsertionLocation.insert1( + dict( + **probe_insertion_key, + skull_reference="Bregma", + ap_location=0, + ml_location=0, + depth=0, + theta=0, + phi=0, + beta=0, + ), + skip_duplicates=True, + ) yield if _tear_down: @@ -394,7 +472,7 @@ def ephys_insertionlocation(pipeline, ingest_sessions): @pytest.fixture def kilosort_paramset(pipeline): """Insert kilosort parameters into ephys.ClusteringParamset""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] params_ks = { "fs": 30000, @@ -418,30 +496,31 @@ def kilosort_paramset(pipeline): "nSkipCov": 25, "scaleproc": 200, "nPCs": 3, - "useRAM": 0 + "useRAM": 0, } # Insert here, since most of the test will require this paramset inserted ephys.ClusteringParamSet.insert_new_params( - clustering_method='kilosort2.5', - paramset_desc='Spike sorting using Kilosort2.5', + clustering_method="kilosort2.5", + paramset_desc="Spike sorting using Kilosort2.5", params=params_ks, - paramset_idx=0) + paramset_idx=0, + ) yield params_ks if _tear_down: if verbose: - (ephys.ClusteringParamSet & 'paramset_idx = 0').delete() + (ephys.ClusteringParamSet & "paramset_idx = 0").delete() else: with QuietStdOut(): - (ephys.ClusteringParamSet & 'paramset_idx = 0').delete() + (ephys.ClusteringParamSet & "paramset_idx = 0").delete() @pytest.fixture def ephys_recordings(pipeline, ingest_sessions): """Populate ephys.EphysRecording""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] ephys.EphysRecording.populate() @@ -458,19 +537,24 @@ def ephys_recordings(pipeline, ingest_sessions): @pytest.fixture def clustering_tasks(pipeline, kilosort_paramset, ephys_recordings): """Insert keys from ephys.EphysRecording into ephys.Clustering""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - for ephys_rec_key in (ephys.EphysRecording - ephys.ClusteringTask).fetch('KEY'): - ephys_file_path = pathlib.Path(((ephys.EphysRecording.EphysFile & ephys_rec_key - ).fetch('file_path'))[0]) + for ephys_rec_key in (ephys.EphysRecording - ephys.ClusteringTask).fetch("KEY"): + ephys_file_path = pathlib.Path( + ((ephys.EphysRecording.EphysFile & ephys_rec_key).fetch("file_path"))[0] + ) ephys_file = find_full_path(get_ephys_root_data_dir(), ephys_file_path) recording_dir = ephys_file.parent - kilosort_dir = next(recording_dir.rglob('spike_times.npy')).parent - ephys.ClusteringTask.insert1({**ephys_rec_key, - 'paramset_idx': 0, - 'task_mode': 'load', - 'clustering_output_dir': kilosort_dir.as_posix()}, - skip_duplicates=True) + kilosort_dir = next(recording_dir.rglob("spike_times.npy")).parent + ephys.ClusteringTask.insert1( + { + **ephys_rec_key, + "paramset_idx": 0, + "task_mode": "load", + "clustering_output_dir": kilosort_dir.as_posix(), + }, + skip_duplicates=True, + ) yield @@ -485,7 +569,7 @@ def clustering_tasks(pipeline, kilosort_paramset, ephys_recordings): @pytest.fixture def clustering(clustering_tasks, pipeline): """Populate ephys.Clustering""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] ephys.Clustering.populate() @@ -502,14 +586,14 @@ def clustering(clustering_tasks, pipeline): @pytest.fixture def curations(clustering, pipeline): """Insert keys from ephys.ClusteringTask into ephys.Curation""" - ephys_mode = pipeline['ephys_mode'] + ephys_mode = pipeline["ephys_mode"] - if ephys_mode == 'no-curation': + if ephys_mode == "no-curation": yield else: - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - for key in (ephys.ClusteringTask - ephys.Curation).fetch('KEY'): + for key in (ephys.ClusteringTask - ephys.Curation).fetch("KEY"): ephys.Curation().create1_from_clustering_task(key) yield @@ -520,5 +604,3 @@ def curations(clustering, pipeline): else: with QuietStdOut(): ephys.Curation.delete() - - diff --git a/tests/test_export.py b/tests/test_export.py index af393cc5..ae029d16 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -4,14 +4,32 @@ from element_interface.utils import find_root_directory, find_full_path -from . import (dj_config, verbose, QuietStdOut, pipeline, test_data, - lab_csv, lab_project_csv, lab_user_csv, - lab_publications_csv, lab_keywords_csv, lab_protocol_csv, - lab_project_users_csv, ingest_lab, - subjects_csv, ingest_subjects, - sessions_csv, ingest_sessions, - testdata_paths, ephys_insertionlocation, kilosort_paramset, - ephys_recordings, clustering_tasks, clustering, curations) +from . import ( + dj_config, + verbose, + QuietStdOut, + pipeline, + test_data, + lab_csv, + lab_project_csv, + lab_user_csv, + lab_publications_csv, + lab_keywords_csv, + lab_protocol_csv, + lab_project_users_csv, + ingest_lab, + subjects_csv, + ingest_subjects, + sessions_csv, + ingest_sessions, + testdata_paths, + ephys_insertionlocation, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +) __all__ = [ "dj_config", @@ -38,21 +56,24 @@ "curations", ] -from workflow_array_ephys.export import (ecephys_session_to_nwb, - session_to_nwb, write_nwb) +from workflow_array_ephys.export import ( + ecephys_session_to_nwb, + session_to_nwb, + write_nwb, +) def test_session_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions): session_kwargs = { - 'session_key':{ + "session_key": { "subject": "subject5", "session_datetime": datetime.datetime(2018, 7, 3, 20, 32, 28), }, - 'lab_key': {"lab": "LabA"}, - 'protocol_key':{"protocol": "ProtA"}, - 'project_key':{"project": "ProjA"}, + "lab_key": {"lab": "LabA"}, + "protocol_key": {"protocol": "ProtA"}, + "project_key": {"project": "ProjA"}, } - + if verbose: nwbfile = session_to_nwb(**session_kwargs) else: @@ -63,7 +84,8 @@ def test_session_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions): assert nwbfile.session_description == "Successful data collection" # when saved in NWB, converts local to UTC assert nwbfile.session_start_time == datetime.datetime( - 2018, 7, 3, 20, 32, 28).astimezone(datetime.timezone.utc) + 2018, 7, 3, 20, 32, 28 + ).astimezone(datetime.timezone.utc) assert nwbfile.experimenter == ["User1"] assert nwbfile.subject.subject_id == "subject5" @@ -78,24 +100,33 @@ def test_session_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions): assert nwbfile.experiment_description == "Example project to populate element-lab" -def test_write_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions, - ephys_insertionlocation, kilosort_paramset, ephys_recordings, - clustering_tasks, clustering, curations): - ephys = pipeline['ephys'] +def test_write_to_nwb( + pipeline, + ingest_lab, + ingest_subjects, + ingest_sessions, + ephys_insertionlocation, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +): + ephys = pipeline["ephys"] - session_key = dict(subject='subject5', session_datetime='2018-07-03 20:32:28') + session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") ephys.LFP.populate(session_key, display_progress=verbose) ephys.CuratedClustering.populate(session_key, display_progress=verbose) ephys.WaveformSet.populate(session_key, display_progress=verbose) ecephys_kwargs = { - 'session_key':session_key, - 'raw':True, - 'spikes':True, - 'lfp':"dj", + "session_key": session_key, + "raw": True, + "spikes": True, + "lfp": "dj", } - + if verbose: nwbfile = ecephys_session_to_nwb(**ecephys_kwargs) else: @@ -110,27 +141,36 @@ def test_write_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions, (pipeline["session"].SessionDirectory & session_key).fetch1("session_dir"), ), ) - + write_nwb(nwbfile, root_dir / time.strftime("_test_%Y%m%d-%H%M%S.nwb")) -def test_convert_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions, - ephys_insertionlocation, kilosort_paramset, ephys_recordings, - clustering_tasks, clustering, curations): - ephys = pipeline['ephys'] - - session_key = dict(subject='subject5', session_datetime='2018-07-03 20:32:28') - +def test_convert_to_nwb( + pipeline, + ingest_lab, + ingest_subjects, + ingest_sessions, + ephys_insertionlocation, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +): + ephys = pipeline["ephys"] + + session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") + ephys.CuratedClustering.populate(session_key, display_progress=verbose) ephys.WaveformSet.populate(session_key, display_progress=verbose) - + ecephys_kwargs = { - 'session_key':session_key, - 'end_frame':1000, - 'spikes':True, - 'lab_key': {"lab": "LabA"}, - 'protocol_key':{"protocol": "ProtA"}, - 'project_key':{"project": "ProjA"}, + "session_key": session_key, + "end_frame": 1000, + "spikes": True, + "lab_key": {"lab": "LabA"}, + "protocol_key": {"protocol": "ProtA"}, + "project_key": {"project": "ProjA"}, } if verbose: @@ -141,26 +181,26 @@ def test_convert_to_nwb(pipeline, ingest_lab, ingest_subjects, ingest_sessions, for x in ("262716621", "714000838"): assert x in nwbfile.devices - + assert len(nwbfile.electrodes) == 1920 for col in ("shank", "shank_row", "shank_col"): assert col in nwbfile.electrodes - + for es_name in ("ElectricalSeries1", "ElectricalSeries2"): es = nwbfile.acquisition[es_name] assert isinstance(es, ElectricalSeries) assert es.conversion == 2.34375e-06 - + # make sure the ElectricalSeries objects don't share electrodes assert not set(nwbfile.acquisition["ElectricalSeries1"].electrodes.data) & set( nwbfile.acquisition["ElectricalSeries2"].electrodes.data ) - - assert len(nwbfile.units) == 499 + + assert len(nwbfile.units) == 499 for col in ("cluster_quality_label", "spike_depths"): assert col in nwbfile.units - + for es_name in ("ElectricalSeries1", "ElectricalSeries2"): es = nwbfile.processing["ecephys"].data_interfaces["LFP"][es_name] assert isinstance(es, ElectricalSeries) diff --git a/tests/test_ingest.py b/tests/test_ingest.py index b75bf3da..d329f967 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -1,28 +1,50 @@ import sys import pathlib -from . import (dj_config, pipeline, test_data, - subjects_csv, ingest_subjects, - sessions_csv, ingest_sessions, - testdata_paths, kilosort_paramset, - ephys_recordings, clustering_tasks, clustering, curations) +from . import ( + dj_config, + pipeline, + test_data, + subjects_csv, + ingest_subjects, + sessions_csv, + ingest_sessions, + testdata_paths, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +) # Set all to pass linter warning: PEP8 F811 -__all__ = ['dj_config', 'pipeline', 'test_data', 'subjects_csv', 'ingest_subjects', - 'sessions_csv', 'ingest_sessions', 'testdata_paths', 'kilosort_paramset', - 'ephys_recordings', 'clustering_tasks', 'clustering', 'curations'] +__all__ = [ + "dj_config", + "pipeline", + "test_data", + "subjects_csv", + "ingest_subjects", + "sessions_csv", + "ingest_sessions", + "testdata_paths", + "kilosort_paramset", + "ephys_recordings", + "clustering_tasks", + "clustering", + "curations", +] def test_ingest_subjects(pipeline, ingest_subjects): - """ Check number of subjects inserted into the `subject.Subject` table """ - subject = pipeline['subject'] + """Check number of subjects inserted into the `subject.Subject` table""" + subject = pipeline["subject"] assert len(subject.Subject()) == 6 def test_ingest_sessions(pipeline, sessions_csv, ingest_sessions): - ephys = pipeline['ephys'] - probe = pipeline['probe'] - session = pipeline['session'] + ephys = pipeline["ephys"] + probe = pipeline["probe"] + session = pipeline["session"] assert len(session.Session()) == 7 assert len(probe.Probe()) == 9 @@ -31,35 +53,39 @@ def test_ingest_sessions(pipeline, sessions_csv, ingest_sessions): sessions, _ = sessions_csv sess = sessions.iloc[0] - assert (session.SessionDirectory - & {'subject': sess.name}).fetch1('session_dir') == sess.session_dir + assert (session.SessionDirectory & {"subject": sess.name}).fetch1( + "session_dir" + ) == sess.session_dir def test_find_valid_full_path(pipeline, sessions_csv): from element_interface.utils import find_full_path - get_ephys_root_data_dir = pipeline['get_ephys_root_data_dir'] - ephys_root_data_dir = ([get_ephys_root_data_dir()] - if not isinstance(get_ephys_root_data_dir(), list) - else get_ephys_root_data_dir()) + get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] + ephys_root_data_dir = ( + [get_ephys_root_data_dir()] + if not isinstance(get_ephys_root_data_dir(), list) + else get_ephys_root_data_dir() + ) # add more options for root directories - if sys.platform == 'win32': # win32 even if Windows 64-bit - ephys_root_data_dir = ephys_root_data_dir + ['J:/', 'M:/'] + if sys.platform == "win32": # win32 even if Windows 64-bit + ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] else: - ephys_root_data_dir = ephys_root_data_dir + ['mnt/j', 'mnt/m'] + ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] # test: providing relative-path: correctly search for the full-path sessions, _ = sessions_csv sess = sessions.iloc[0] session_full_path = find_full_path(ephys_root_data_dir, sess.session_dir) - docker_full_path = pathlib.Path('/main/test_data/workflow_ephys_data1/' - + 'subject1/session1') + docker_full_path = pathlib.Path( + "/main/test_data/workflow_ephys_data1/" + "subject1/session1" + ) - assert docker_full_path == session_full_path, str('Session path does not match ' - + 'docker root: ' - + f'{docker_full_path}') + assert docker_full_path == session_full_path, str( + "Session path does not match " + "docker root: " + f"{docker_full_path}" + ) def test_find_root_directory(pipeline, sessions_csv): @@ -68,35 +94,39 @@ def test_find_root_directory(pipeline, sessions_csv): """ from element_interface.utils import find_root_directory - get_ephys_root_data_dir = pipeline['get_ephys_root_data_dir'] - ephys_root_data_dir = ([get_ephys_root_data_dir()] - if not isinstance(get_ephys_root_data_dir(), list) - else get_ephys_root_data_dir()) + get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] + ephys_root_data_dir = ( + [get_ephys_root_data_dir()] + if not isinstance(get_ephys_root_data_dir(), list) + else get_ephys_root_data_dir() + ) # add more options for root directories - if sys.platform == 'win32': - ephys_root_data_dir = ephys_root_data_dir + ['J:/', 'M:/'] + if sys.platform == "win32": + ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] else: - ephys_root_data_dir = ephys_root_data_dir + ['mnt/j', 'mnt/m'] + ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] # test: providing full-path: correctly search for the root_dir sessions, _ = sessions_csv sess = sessions.iloc[0] # set to /main/, will only work in docker environment - session_full_path = pathlib.Path('/main/test_data/workflow_ephys_data1', - sess.session_dir) + session_full_path = pathlib.Path( + "/main/test_data/workflow_ephys_data1", sess.session_dir + ) root_dir = find_root_directory(ephys_root_data_dir, session_full_path) - assert root_dir.as_posix() == '/main/test_data/workflow_ephys_data1',\ - 'Root path does not match: /main/test_data/workflow_ephys_data1' + assert ( + root_dir.as_posix() == "/main/test_data/workflow_ephys_data1" + ), "Root path does not match: /main/test_data/workflow_ephys_data1" def test_paramset_insert(kilosort_paramset, pipeline): - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] from element_interface.utils import dict_to_uuid - method, desc, paramset_hash = (ephys.ClusteringParamSet - & {'paramset_idx': 0}).fetch1( - 'clustering_method', 'paramset_desc', 'param_set_hash') - assert method == 'kilosort2.5' - assert desc == 'Spike sorting using Kilosort2.5' + method, desc, paramset_hash = ( + ephys.ClusteringParamSet & {"paramset_idx": 0} + ).fetch1("clustering_method", "paramset_desc", "param_set_hash") + assert method == "kilosort2.5" + assert desc == "Spike sorting using Kilosort2.5" assert dict_to_uuid(kilosort_paramset) == paramset_hash diff --git a/tests/test_populate.py b/tests/test_populate.py index c082cbbe..fdbfc690 100644 --- a/tests/test_populate.py +++ b/tests/test_populate.py @@ -1,18 +1,40 @@ import numpy as np -__all__ = ['dj_config', 'pipeline', 'test_data', 'subjects_csv', 'ingest_subjects', - 'sessions_csv', 'ingest_sessions', 'testdata_paths', 'kilosort_paramset', - 'ephys_recordings', 'clustering_tasks', 'clustering', 'curations'] - -from . import (dj_config, pipeline, test_data, - subjects_csv, ingest_subjects, - sessions_csv, ingest_sessions, - testdata_paths, kilosort_paramset, - ephys_recordings, clustering_tasks, clustering, curations) +__all__ = [ + "dj_config", + "pipeline", + "test_data", + "subjects_csv", + "ingest_subjects", + "sessions_csv", + "ingest_sessions", + "testdata_paths", + "kilosort_paramset", + "ephys_recordings", + "clustering_tasks", + "clustering", + "curations", +] + +from . import ( + dj_config, + pipeline, + test_data, + subjects_csv, + ingest_subjects, + sessions_csv, + ingest_sessions, + testdata_paths, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +) def test_ephys_recording_populate(pipeline, ephys_recordings): - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] assert len(ephys.EphysRecording()) == 13 @@ -21,45 +43,135 @@ def test_LFP_populate_npx3B_OpenEphys(testdata_paths, pipeline, ephys_recordings Populate ephys.LFP with OpenEphys items, recording Neuropixels Phase 3B (Neuropixels 1.0) probe """ - ephys = pipeline['ephys'] - rel_path = testdata_paths['oe_npx3B'] - rec_key = (ephys.EphysRecording & (ephys.EphysRecording.EphysFile - & f'file_path LIKE "%{rel_path}"') - ).fetch1('KEY') + ephys = pipeline["ephys"] + rel_path = testdata_paths["oe_npx3B"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}"') + ).fetch1("KEY") ephys.LFP.populate(rec_key) - lfp_mean = (ephys.LFP & rec_key).fetch1('lfp_mean') + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") assert len(lfp_mean) == 520054 - electrodes = (ephys.LFP.Electrode & rec_key).fetch('electrode') + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") assert np.array_equal( electrodes, - np.array([5, 14, 23, 32, 41, 50, 59, 68, 77, 86, 95, 104, - 113, 122, 131, 140, 149, 158, 167, 176, 185, 194, 203, 212, - 221, 230, 239, 248, 257, 266, 275, 284, 293, 302, 311, 320, - 329, 338, 347, 356, 365, 374, 383])) + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) def test_LFP_populate_npx3A_SpikeGLX(testdata_paths, pipeline, ephys_recordings): """Populate ephys.LFP with SpikeGLX items, recording Neuropixels Phase 3A probe""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - rel_path = testdata_paths['sglx_npx3A-p1'] - rec_key = (ephys.EphysRecording & (ephys.EphysRecording.EphysFile - & f'file_path LIKE "%{rel_path}%"') - ).fetch1('KEY') + rel_path = testdata_paths["sglx_npx3A-p1"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') + ).fetch1("KEY") ephys.LFP.populate(rec_key) - lfp_mean = (ephys.LFP & rec_key).fetch1('lfp_mean') + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") assert len(lfp_mean) == 846666 - electrodes = (ephys.LFP.Electrode & rec_key).fetch('electrode') + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") assert np.array_equal( electrodes, - np.array([5, 14, 23, 32, 41, 50, 59, 68, 77, 86, 95, 104, - 113, 122, 131, 140, 149, 158, 167, 176, 185, 194, 203, 212, - 221, 230, 239, 248, 257, 266, 275, 284, 293, 302, 311, 320, - 329, 338, 347, 356, 365, 374, 383])) + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) def test_LFP_populate_npx3B_SpikeGLX(testdata_paths, pipeline, ephys_recordings): @@ -68,52 +180,115 @@ def test_LFP_populate_npx3B_SpikeGLX(testdata_paths, pipeline, ephys_recordings) recording Neuropixels Phase 3B (Neuropixels 1.0) probe """ - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - rel_path = testdata_paths['sglx_npx3B-p1'] - rec_key = (ephys.EphysRecording & (ephys.EphysRecording.EphysFile - & f'file_path LIKE "%{rel_path}%"') - ).fetch1('KEY') + rel_path = testdata_paths["sglx_npx3B-p1"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') + ).fetch1("KEY") ephys.LFP.populate(rec_key) - lfp_mean = (ephys.LFP & rec_key).fetch1('lfp_mean') + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") assert len(lfp_mean) == 4769946 - electrodes = (ephys.LFP.Electrode & rec_key).fetch('electrode') + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") assert np.array_equal( electrodes, - np.array([5, 14, 23, 32, 41, 50, 59, 68, 77, 86, 95, 104, - 113, 122, 131, 140, 149, 158, 167, 176, 185, 194, 203, 212, - 221, 230, 239, 248, 257, 266, 275, 284, 293, 302, 311, 320, - 329, 338, 347, 356, 365, 374, 383])) + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) def test_clustering_populate(clustering, pipeline): - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] assert len(ephys.Clustering()) == 13 def test_curated_clustering_populate(curations, pipeline, testdata_paths): """Populate ephys.CuratedClustering with multiple recordings""" - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - rel_path = testdata_paths['npx3A-p1-ks'] + rel_path = testdata_paths["npx3A-p1-ks"] curation_key = _get_curation_key(rel_path, pipeline) ephys.CuratedClustering.populate(curation_key) - assert len(ephys.CuratedClustering.Unit & curation_key - & 'cluster_quality_label = "good"') == 76 - - rel_path = testdata_paths['oe_npx3B-ks'] + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 76 + ) + + rel_path = testdata_paths["oe_npx3B-ks"] curation_key = _get_curation_key(rel_path, pipeline) ephys.CuratedClustering.populate(curation_key) - assert len(ephys.CuratedClustering.Unit & curation_key - & 'cluster_quality_label = "good"') == 68 - - rel_path = testdata_paths['npx3B-p1-ks'] + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 68 + ) + + rel_path = testdata_paths["npx3B-p1-ks"] curation_key = _get_curation_key(rel_path, pipeline) ephys.CuratedClustering.populate(curation_key) - assert len(ephys.CuratedClustering.Unit & curation_key - & 'cluster_quality_label = "good"') == 55 + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 55 + ) def test_waveform_populate_npx3B_OpenEphys(curations, pipeline, testdata_paths): @@ -121,14 +296,15 @@ def test_waveform_populate_npx3B_OpenEphys(curations, pipeline, testdata_paths): Populate ephys.WaveformSet with OpenEphys Neuropixels Phase 3B (Neuropixels 1.0) probe """ - ephys = pipeline['ephys'] - rel_path = testdata_paths['oe_npx3B-ks'] + ephys = pipeline["ephys"] + rel_path = testdata_paths["oe_npx3B-ks"] curation_key = _get_curation_key(rel_path, pipeline) ephys.CuratedClustering.populate(curation_key) ephys.WaveformSet.populate(curation_key) - waveforms = np.vstack((ephys.WaveformSet.PeakWaveform & curation_key - ).fetch('peak_electrode_waveform')) + waveforms = np.vstack( + (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") + ) assert waveforms.shape == (204, 64) @@ -139,30 +315,34 @@ def test_waveform_populate_npx3B_SpikeGLX(curations, pipeline, testdata_paths): Neuropixels Phase 3B (Neuropixels 1.0) probe """ - ephys = pipeline['ephys'] + ephys = pipeline["ephys"] - rel_path = testdata_paths['npx3B-p1-ks'] + rel_path = testdata_paths["npx3B-p1-ks"] curation_key = _get_curation_key(rel_path, pipeline) ephys.CuratedClustering.populate(curation_key) ephys.WaveformSet.populate(curation_key) - waveforms = np.vstack((ephys.WaveformSet.PeakWaveform - & curation_key).fetch('peak_electrode_waveform')) + waveforms = np.vstack( + (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") + ) assert waveforms.shape == (150, 64) # ---- HELPER FUNCTIONS ---- + def _get_curation_key(output_relative_path, pipeline): - ephys = pipeline['ephys'] - ephys_mode = pipeline['ephys_mode'] + ephys = pipeline["ephys"] + ephys_mode = pipeline["ephys_mode"] - if ephys_mode == 'no-curation': + if ephys_mode == "no-curation": EphysCuration = ephys.ClusteringTask - output_dir_attr_name = 'clustering_output_dir' + output_dir_attr_name = "clustering_output_dir" else: EphysCuration = ephys.Curation - output_dir_attr_name = 'curation_output_dir' + output_dir_attr_name = "curation_output_dir" - return (EphysCuration & f'{output_dir_attr_name} LIKE "%{output_relative_path}"').fetch1('KEY') + return ( + EphysCuration & f'{output_dir_attr_name} LIKE "%{output_relative_path}"' + ).fetch1("KEY") diff --git a/workflow_array_ephys/analysis.py b/workflow_array_ephys/analysis.py index ecae583e..c71fc3f3 100644 --- a/workflow_array_ephys/analysis.py +++ b/workflow_array_ephys/analysis.py @@ -5,7 +5,7 @@ __all__ = ["db_prefix", "ephys", "trial", "event"] -schema = dj.schema(db_prefix + 'analysis') +schema = dj.schema(db_prefix + "analysis") @schema @@ -51,44 +51,58 @@ class UnitPSTH(dj.Part): """ def make(self, key): - unit_keys, unit_spike_times = (ephys.CuratedClustering.Unit & key - ).fetch('KEY', 'spike_times', order_by='unit') - bin_size = (SpikesAlignmentCondition & key).fetch1('bin_size') + unit_keys, unit_spike_times = (ephys.CuratedClustering.Unit & key).fetch( + "KEY", "spike_times", order_by="unit" + ) + bin_size = (SpikesAlignmentCondition & key).fetch1("bin_size") trialized_event_times = trial.get_trialized_alignment_event_times( - key, trial.Trial & (SpikesAlignmentCondition.Trial & key)) + key, trial.Trial & (SpikesAlignmentCondition.Trial & key) + ) min_limit = (trialized_event_times.event - trialized_event_times.start).max() max_limit = (trialized_event_times.end - trialized_event_times.event).max() # Spike raster aligned_trial_spikes = [] - units_spike_raster = {u['unit']: {**key, **u, 'aligned_spikes': [] - } for u in unit_keys} + units_spike_raster = { + u["unit"]: {**key, **u, "aligned_spikes": []} for u in unit_keys + } for _, r in trialized_event_times.iterrows(): if np.isnan(r.event): continue alignment_start_time = r.event - min_limit alignment_end_time = r.event + max_limit for unit_key, spikes in zip(unit_keys, unit_spike_times): - aligned_spikes = spikes[(alignment_start_time <= spikes) - & (spikes < alignment_end_time)] - r.event - aligned_trial_spikes.append({**key, **unit_key, - **r.trial_key, - 'aligned_spike_times': aligned_spikes}) - units_spike_raster[unit_key['unit']]['aligned_spikes' - ].append(aligned_spikes) + aligned_spikes = ( + spikes[ + (alignment_start_time <= spikes) & (spikes < alignment_end_time) + ] + - r.event + ) + aligned_trial_spikes.append( + { + **key, + **unit_key, + **r.trial_key, + "aligned_spike_times": aligned_spikes, + } + ) + units_spike_raster[unit_key["unit"]]["aligned_spikes"].append( + aligned_spikes + ) # PSTH for unit_spike_raster in units_spike_raster.values(): - spikes = np.concatenate(unit_spike_raster['aligned_spikes']) + spikes = np.concatenate(unit_spike_raster["aligned_spikes"]) - psth, edges = np.histogram(spikes, - bins=np.arange(-min_limit, max_limit, bin_size)) - unit_spike_raster['psth'] = (psth - / len(unit_spike_raster.pop('aligned_spikes')) - / bin_size) - unit_spike_raster['psth_edges'] = edges[1:] + psth, edges = np.histogram( + spikes, bins=np.arange(-min_limit, max_limit, bin_size) + ) + unit_spike_raster["psth"] = ( + psth / len(unit_spike_raster.pop("aligned_spikes")) / bin_size + ) + unit_spike_raster["psth_edges"] = edges[1:] self.insert1(key) self.AlignedTrialSpikes.insert(aligned_trial_spikes) @@ -103,17 +117,22 @@ def plot(self, key, unit, axs=None): fig, axs = plt.subplots(2, 1, figsize=(12, 8)) bin_size = (SpikesAlignmentCondition & key).fetch1("bin_size") - trial_ids, aligned_spikes = (self.AlignedTrialSpikes - & key & {'unit': unit} - ).fetch('trial_id', 'aligned_spike_times') - psth, psth_edges = (self.UnitPSTH & key & {'unit': unit}).fetch1( - 'psth', 'psth_edges') + trial_ids, aligned_spikes = ( + self.AlignedTrialSpikes & key & {"unit": unit} + ).fetch("trial_id", "aligned_spike_times") + psth, psth_edges = (self.UnitPSTH & key & {"unit": unit}).fetch1( + "psth", "psth_edges" + ) xlim = psth_edges[0], psth_edges[-1] - plot_psth._plot_spike_raster(aligned_spikes, trial_ids=trial_ids, ax=axs[0], - title=f'{dict(**key, unit=unit)}', xlim=xlim) - plot_psth._plot_psth(psth, psth_edges, bin_size, ax=axs[1], - title='', xlim=xlim) + plot_psth._plot_spike_raster( + aligned_spikes, + trial_ids=trial_ids, + ax=axs[0], + title=f"{dict(**key, unit=unit)}", + xlim=xlim, + ) + plot_psth._plot_psth(psth, psth_edges, bin_size, ax=axs[1], title="", xlim=xlim) return fig diff --git a/workflow_array_ephys/export.py b/workflow_array_ephys/export.py index 1f8307e3..a2f5e0f1 100644 --- a/workflow_array_ephys/export.py +++ b/workflow_array_ephys/export.py @@ -6,5 +6,10 @@ # Import ephys NWB export functions from element_array_ephys.export.nwb import ecephys_session_to_nwb, write_nwb -__all__ = ['element_lab_to_nwb_dict', 'subject_to_nwb', 'session_to_nwb', - 'ecephys_session_to_nwb', 'write_nwb'] +__all__ = [ + "element_lab_to_nwb_dict", + "subject_to_nwb", + "session_to_nwb", + "ecephys_session_to_nwb", + "write_nwb", +] diff --git a/workflow_array_ephys/ingest.py b/workflow_array_ephys/ingest.py index 0b2c6b35..5fcdef90 100644 --- a/workflow_array_ephys/ingest.py +++ b/workflow_array_ephys/ingest.py @@ -1,23 +1,36 @@ import csv import re -from workflow_array_ephys.pipeline import lab, subject, ephys, probe, session, trial, \ - event +from workflow_array_ephys.pipeline import ( + lab, + subject, + ephys, + probe, + session, + trial, + event, +) from workflow_array_ephys.paths import get_ephys_root_data_dir from element_array_ephys.readers import spikeglx, openephys -from element_interface.utils import find_root_directory, find_full_path, \ - ingest_csv_to_table - - -def ingest_lab(lab_csv_path='./user_data/lab/labs.csv', - project_csv_path='./user_data/lab/projects.csv', - publication_csv_path='./user_data/lab/publications.csv', - keyword_csv_path='./user_data/lab/keywords.csv', - protocol_csv_path='./user_data/lab/protocols.csv', - users_csv_path='./user_data/lab/users.csv', - project_user_csv_path='./user_data/lab/project_users.csv', - skip_duplicates=True, verbose=True): +from element_interface.utils import ( + find_root_directory, + find_full_path, + ingest_csv_to_table, +) + + +def ingest_lab( + lab_csv_path="./user_data/lab/labs.csv", + project_csv_path="./user_data/lab/projects.csv", + publication_csv_path="./user_data/lab/publications.csv", + keyword_csv_path="./user_data/lab/keywords.csv", + protocol_csv_path="./user_data/lab/protocols.csv", + users_csv_path="./user_data/lab/users.csv", + project_user_csv_path="./user_data/lab/project_users.csv", + skip_duplicates=True, + verbose=True, +): """ Inserts data from a CSVs into their corresponding lab schema tables. By default, uses data from workflow/user_data/lab/ @@ -33,164 +46,231 @@ def ingest_lab(lab_csv_path='./user_data/lab/labs.csv', """ # List with repeats for when mult dj.tables fed by same CSV - csvs = [lab_csv_path, lab_csv_path, - project_csv_path, project_csv_path, - publication_csv_path, keyword_csv_path, - protocol_csv_path, protocol_csv_path, - users_csv_path, users_csv_path, users_csv_path, - project_user_csv_path] - tables = [lab.Lab(), lab.Location(), - lab.Project(), lab.ProjectSourceCode(), - lab.ProjectPublication(), lab.ProjectKeywords(), - lab.ProtocolType(), lab.Protocol(), - lab.UserRole(), lab.User(), lab.LabMembership(), - lab.ProjectUser()] + csvs = [ + lab_csv_path, + lab_csv_path, + project_csv_path, + project_csv_path, + publication_csv_path, + keyword_csv_path, + protocol_csv_path, + protocol_csv_path, + users_csv_path, + users_csv_path, + users_csv_path, + project_user_csv_path, + ] + tables = [ + lab.Lab(), + lab.Location(), + lab.Project(), + lab.ProjectSourceCode(), + lab.ProjectPublication(), + lab.ProjectKeywords(), + lab.ProtocolType(), + lab.Protocol(), + lab.UserRole(), + lab.User(), + lab.LabMembership(), + lab.ProjectUser(), + ] ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) -def ingest_subjects(subject_csv_path='./user_data/subjects.csv', verbose=True): +def ingest_subjects(subject_csv_path="./user_data/subjects.csv", verbose=True): """ Ingest subjects listed in the subject column of ./user_data/subjects.csv """ # -------------- Insert new "Subject" -------------- - with open(subject_csv_path, newline='') as f: - input_subjects = list(csv.DictReader(f, delimiter=',')) + with open(subject_csv_path, newline="") as f: + input_subjects = list(csv.DictReader(f, delimiter=",")) if verbose: previous_length = len(subject.Subject.fetch()) subject.Subject.insert(input_subjects, skip_duplicates=True) if verbose: insert_length = len(subject.Subject.fetch()) - previous_length - print(f'\n---- Insert {insert_length} entry(s) into ' - + 'subject.Subject ----') - print('\n---- Successfully completed ingest_subjects ----') + print(f"\n---- Insert {insert_length} entry(s) into " + "subject.Subject ----") + print("\n---- Successfully completed ingest_subjects ----") -def ingest_sessions(session_csv_path='./user_data/sessions.csv', verbose=True): +def ingest_sessions(session_csv_path="./user_data/sessions.csv", verbose=True): """ Ingests SpikeGLX and OpenEphys files from directories listed in the session_dir column of ./user_data/sessions.csv """ # ---------- Insert new "Session" and "ProbeInsertion" --------- - with open(session_csv_path, newline='') as f: - input_sessions = list(csv.DictReader(f, delimiter=',')) + with open(session_csv_path, newline="") as f: + input_sessions = list(csv.DictReader(f, delimiter=",")) # Folder structure: root / subject / session / probe / .ap.meta - session_list, session_dir_list, = [], [] + session_list, session_dir_list, = ( + [], + [], + ) session_note_list, session_experimenter_list = [], [] probe_list, probe_insertion_list = [], [] for sess in input_sessions: - session_dir = find_full_path(get_ephys_root_data_dir(), - sess['session_dir']) + session_dir = find_full_path(get_ephys_root_data_dir(), sess["session_dir"]) session_datetimes, insertions = [], [] # search session dir and determine acquisition software - for ephys_pattern, ephys_acq_type in zip(['*.ap.meta', '*.oebin'], - ['SpikeGLX', 'OpenEphys']): + for ephys_pattern, ephys_acq_type in zip( + ["*.ap.meta", "*.oebin"], ["SpikeGLX", "OpenEphys"] + ): ephys_meta_filepaths = [fp for fp in session_dir.rglob(ephys_pattern)] if len(ephys_meta_filepaths): acq_software = ephys_acq_type break else: - raise FileNotFoundError('Ephys recording data not found! Neither SpikeGLX ' - + 'nor OpenEphys recording files found in: ' - + f'{session_dir}') + raise FileNotFoundError( + "Ephys recording data not found! Neither SpikeGLX " + + "nor OpenEphys recording files found in: " + + f"{session_dir}" + ) - if acq_software == 'SpikeGLX': + if acq_software == "SpikeGLX": for meta_filepath in ephys_meta_filepaths: spikeglx_meta = spikeglx.SpikeGLXMeta(meta_filepath) - probe_key = {'probe_type': spikeglx_meta.probe_model, - 'probe': spikeglx_meta.probe_SN} - if (probe_key['probe'] not in [p['probe'] for p in probe_list - ] and probe_key not in probe.Probe()): + probe_key = { + "probe_type": spikeglx_meta.probe_model, + "probe": spikeglx_meta.probe_SN, + } + if ( + probe_key["probe"] not in [p["probe"] for p in probe_list] + and probe_key not in probe.Probe() + ): probe_list.append(probe_key) probe_dir = meta_filepath.parent - probe_number = re.search('(imec)?\d{1}$', probe_dir.name - ).group() - probe_number = int(probe_number.replace('imec', '')) + probe_number = re.search("(imec)?\d{1}$", probe_dir.name).group() + probe_number = int(probe_number.replace("imec", "")) - insertions.append({'probe': spikeglx_meta.probe_SN, - 'insertion_number': int(probe_number)}) + insertions.append( + { + "probe": spikeglx_meta.probe_SN, + "insertion_number": int(probe_number), + } + ) session_datetimes.append(spikeglx_meta.recording_time) - elif acq_software == 'OpenEphys': + elif acq_software == "OpenEphys": loaded_oe = openephys.OpenEphys(session_dir) session_datetimes.append(loaded_oe.experiment.datetime) for probe_idx, oe_probe in enumerate(loaded_oe.probes.values()): - probe_key = {'probe_type': oe_probe.probe_model, - 'probe': oe_probe.probe_SN} - if (probe_key['probe'] not in [p['probe'] for p in probe_list - ] and probe_key not in probe.Probe()): + probe_key = { + "probe_type": oe_probe.probe_model, + "probe": oe_probe.probe_SN, + } + if ( + probe_key["probe"] not in [p["probe"] for p in probe_list] + and probe_key not in probe.Probe() + ): probe_list.append(probe_key) - insertions.append({'probe': oe_probe.probe_SN, - 'insertion_number': probe_idx}) + insertions.append( + {"probe": oe_probe.probe_SN, "insertion_number": probe_idx} + ) else: - raise NotImplementedError('Unknown acquisition software: ' - + f'{acq_software}') + raise NotImplementedError( + "Unknown acquisition software: " + f"{acq_software}" + ) # new session/probe-insertion - session_key = {'subject': sess['subject'], - 'session_datetime': min(session_datetimes)} + session_key = { + "subject": sess["subject"], + "session_datetime": min(session_datetimes), + } if session_key not in session.Session(): session_list.append(session_key) root_dir = find_root_directory(get_ephys_root_data_dir(), session_dir) - session_dir_list.append({**session_key, 'session_dir': - session_dir.relative_to(root_dir).as_posix()}) - session_note_list.append({**session_key, 'session_note': - sess['session_note']}) - session_experimenter_list.append({**session_key, 'user': - sess['user']}) - probe_insertion_list.extend([{**session_key, **insertion - } for insertion in insertions]) + session_dir_list.append( + { + **session_key, + "session_dir": session_dir.relative_to(root_dir).as_posix(), + } + ) + session_note_list.append( + {**session_key, "session_note": sess["session_note"]} + ) + session_experimenter_list.append({**session_key, "user": sess["user"]}) + probe_insertion_list.extend( + [{**session_key, **insertion} for insertion in insertions] + ) session.Session.insert(session_list) session.SessionDirectory.insert(session_dir_list) session.SessionNote.insert(session_note_list) session.SessionExperimenter.insert(session_experimenter_list) if verbose: - print(f'\n---- Insert {len(session_list)} entry(s) into session.Session ----') + print(f"\n---- Insert {len(session_list)} entry(s) into session.Session ----") probe.Probe.insert(probe_list) if verbose: - print(f'\n---- Insert {len(probe_list)} entry(s) into probe.Probe ----') + print(f"\n---- Insert {len(probe_list)} entry(s) into probe.Probe ----") ephys.ProbeInsertion.insert(probe_insertion_list) if verbose: - print(f'\n---- Insert {len(probe_insertion_list)} entry(s) into ' - + 'ephys.ProbeInsertion ----') - print('\n---- Successfully completed ingest_subjects ----') + print( + f"\n---- Insert {len(probe_insertion_list)} entry(s) into " + + "ephys.ProbeInsertion ----" + ) + print("\n---- Successfully completed ingest_subjects ----") -def ingest_events(recording_csv_path='./user_data/behavior_recordings.csv', - block_csv_path='./user_data/blocks.csv', - trial_csv_path='./user_data/trials.csv', - event_csv_path='./user_data/events.csv', - skip_duplicates=True, verbose=True): +def ingest_events( + recording_csv_path="./user_data/behavior_recordings.csv", + block_csv_path="./user_data/blocks.csv", + trial_csv_path="./user_data/trials.csv", + event_csv_path="./user_data/events.csv", + skip_duplicates=True, + verbose=True, +): """ Ingest each level of experiment heirarchy for element-event: recording, block (i.e., phases of trials), trials (repeated units), events (optionally 0-duration occurances within trial). """ - csvs = [recording_csv_path, recording_csv_path, - block_csv_path, block_csv_path, - trial_csv_path, trial_csv_path, trial_csv_path, - trial_csv_path, - event_csv_path, event_csv_path, event_csv_path] - tables = [event.BehaviorRecording(), event.BehaviorRecording.File(), - trial.Block(), trial.Block.Attribute(), - trial.TrialType(), trial.Trial(), trial.Trial.Attribute(), - trial.BlockTrial(), - event.EventType(), event.Event(), trial.TrialEvent()] + csvs = [ + recording_csv_path, + recording_csv_path, + block_csv_path, + block_csv_path, + trial_csv_path, + trial_csv_path, + trial_csv_path, + trial_csv_path, + event_csv_path, + event_csv_path, + event_csv_path, + ] + tables = [ + event.BehaviorRecording(), + event.BehaviorRecording.File(), + trial.Block(), + trial.Block.Attribute(), + trial.TrialType(), + trial.Trial(), + trial.Trial.Attribute(), + trial.BlockTrial(), + event.EventType(), + event.Event(), + trial.TrialEvent(), + ] # Allow direct insert required because element-event has Imported that should be Manual - ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose, - allow_direct_insert=True) + ingest_csv_to_table( + csvs, + tables, + skip_duplicates=skip_duplicates, + verbose=verbose, + allow_direct_insert=True, + ) -def ingest_alignment(alignment_csv_path='./user_data/alignments.csv', - skip_duplicates=True, verbose=True): +def ingest_alignment( + alignment_csv_path="./user_data/alignments.csv", skip_duplicates=True, verbose=True +): csvs = [alignment_csv_path] tables = [event.AlignmentEvent()] @@ -198,7 +278,7 @@ def ingest_alignment(alignment_csv_path='./user_data/alignments.csv', ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) -if __name__ == '__main__': +if __name__ == "__main__": ingest_lab() ingest_subjects() ingest_sessions() diff --git a/workflow_array_ephys/localization.py b/workflow_array_ephys/localization.py index c73ead50..79aab953 100644 --- a/workflow_array_ephys/localization.py +++ b/workflow_array_ephys/localization.py @@ -4,19 +4,29 @@ from element_electrode_localization.coordinate_framework import load_ccf_annotation from .pipeline import ephys, probe -from .paths import get_ephys_root_data_dir, get_session_directory, \ - get_electrode_localization_dir - - -if 'custom' not in dj.config: - dj.config['custom'] = {} - -db_prefix = dj.config['custom'].get('database.prefix', '') - -__all__ = ['ephys', 'probe', 'coordinate_framework', 'electrode_localization', - 'ProbeInsertion', - 'get_ephys_root_data_dir', 'get_session_directory', - 'get_electrode_localization_dir', 'load_ccf_annotation'] +from .paths import ( + get_ephys_root_data_dir, + get_session_directory, + get_electrode_localization_dir, +) + + +if "custom" not in dj.config: + dj.config["custom"] = {} + +db_prefix = dj.config["custom"].get("database.prefix", "") + +__all__ = [ + "ephys", + "probe", + "coordinate_framework", + "electrode_localization", + "ProbeInsertion", + "get_ephys_root_data_dir", + "get_session_directory", + "get_electrode_localization_dir", + "load_ccf_annotation", +] ccf_id = 0 voxel_resolution = 100 @@ -25,16 +35,20 @@ ProbeInsertion = ephys.ProbeInsertion -electrode_localization.activate(db_prefix + 'eloc', - db_prefix + 'ccf', - linking_module=__name__) +electrode_localization.activate( + db_prefix + "eloc", db_prefix + "ccf", linking_module=__name__ +) -nrrd_filepath = find_full_path(get_ephys_root_data_dir(), - f'annotation_{voxel_resolution}.nrrd') -ontology_csv_filepath = find_full_path(get_ephys_root_data_dir(), 'query.csv') +nrrd_filepath = find_full_path( + get_ephys_root_data_dir(), f"annotation_{voxel_resolution}.nrrd" +) +ontology_csv_filepath = find_full_path(get_ephys_root_data_dir(), "query.csv") -if not (coordinate_framework.CCF & {'ccf_id': ccf_id}): +if not (coordinate_framework.CCF & {"ccf_id": ccf_id}): coordinate_framework.load_ccf_annotation( - ccf_id=ccf_id, version_name='ccf_2017', voxel_resolution=voxel_resolution, + ccf_id=ccf_id, + version_name="ccf_2017", + voxel_resolution=voxel_resolution, nrrd_filepath=nrrd_filepath, - ontology_csv_filepath=ontology_csv_filepath) + ontology_csv_filepath=ontology_csv_filepath, + ) diff --git a/workflow_array_ephys/paths.py b/workflow_array_ephys/paths.py index fec1f38b..069ffcff 100644 --- a/workflow_array_ephys/paths.py +++ b/workflow_array_ephys/paths.py @@ -4,29 +4,36 @@ def get_ephys_root_data_dir(): - return dj.config.get('custom', {}).get('ephys_root_data_dir', None) + return dj.config.get("custom", {}).get("ephys_root_data_dir", None) def get_session_directory(session_key: dict) -> str: from .pipeline import session - session_dir = (session.SessionDirectory & session_key).fetch1('session_dir') + + session_dir = (session.SessionDirectory & session_key).fetch1("session_dir") return session_dir def get_electrode_localization_dir(probe_insertion_key: dict) -> str: from .pipeline import ephys - acq_software = (ephys.EphysRecording & probe_insertion_key).fetch1('acq_software') - - if acq_software == 'SpikeGLX': - spikeglx_meta_filepath = pathlib.Path((ephys.EphysRecording.EphysFile - & probe_insertion_key - & 'file_path LIKE "%.ap.meta"' - ).fetch1('file_path')) - probe_dir = find_full_path(get_ephys_root_data_dir(), - spikeglx_meta_filepath.parent) - elif acq_software == 'Open Ephys': - probe_path = (ephys.EphysRecording.EphysFile & probe_insertion_key - ).fetch1('file_path') + + acq_software = (ephys.EphysRecording & probe_insertion_key).fetch1("acq_software") + + if acq_software == "SpikeGLX": + spikeglx_meta_filepath = pathlib.Path( + ( + ephys.EphysRecording.EphysFile + & probe_insertion_key + & 'file_path LIKE "%.ap.meta"' + ).fetch1("file_path") + ) + probe_dir = find_full_path( + get_ephys_root_data_dir(), spikeglx_meta_filepath.parent + ) + elif acq_software == "Open Ephys": + probe_path = (ephys.EphysRecording.EphysFile & probe_insertion_key).fetch1( + "file_path" + ) probe_dir = find_full_path(get_ephys_root_data_dir(), probe_path) return probe_dir diff --git a/workflow_array_ephys/pipeline.py b/workflow_array_ephys/pipeline.py index d2c9f8a6..381d70d5 100644 --- a/workflow_array_ephys/pipeline.py +++ b/workflow_array_ephys/pipeline.py @@ -12,56 +12,73 @@ from element_lab.lab import Source, Lab, Protocol, User, Project from element_session.session_with_datetime import Session -from .paths import (get_ephys_root_data_dir, - get_session_directory, - get_electrode_localization_dir) +from .paths import ( + get_ephys_root_data_dir, + get_session_directory, + get_electrode_localization_dir, +) # session and ephys nwb exports check for these in linking_module from .export import element_lab_to_nwb_dict, subject_to_nwb, session_to_nwb -if 'custom' not in dj.config: - dj.config['custom'] = {} +if "custom" not in dj.config: + dj.config["custom"] = {} -db_prefix = dj.config['custom'].get('database.prefix', '') +db_prefix = dj.config["custom"].get("database.prefix", "") # ------------- Import the configured "ephys mode" ------------- -ephys_mode = os.getenv('EPHYS_MODE', - dj.config['custom'].get('ephys_mode', 'acute')) -if ephys_mode == 'acute': +ephys_mode = os.getenv("EPHYS_MODE", dj.config["custom"].get("ephys_mode", "acute")) +if ephys_mode == "acute": from element_array_ephys import ephys -elif ephys_mode == 'chronic': +elif ephys_mode == "chronic": from element_array_ephys import ephys_chronic as ephys -elif ephys_mode == 'no-curation': +elif ephys_mode == "no-curation": from element_array_ephys import ephys_no_curation as ephys else: - raise ValueError(f'Unknown ephys mode: {ephys_mode}') + raise ValueError(f"Unknown ephys mode: {ephys_mode}") # ---------------- All items in namespace for linter ----------- __all__ = [ - # schemas - 'subject', 'lab', 'session', 'trial', 'event', 'probe', 'ephys', - 'coordinate_framework', 'electrode_localization', - # tables - 'Subject', 'Source', 'Lab', 'Protocol', 'User', 'Project', 'Session', - # paths - 'get_ephys_root_data_dir', 'get_session_directory', - 'get_electrode_localization_dir', - # export - 'subject_to_nwb', 'session_to_nwb', "element_lab_to_nwb_dict" - ] + # schemas + "subject", + "lab", + "session", + "trial", + "event", + "probe", + "ephys", + "coordinate_framework", + "electrode_localization", + # tables + "Subject", + "Source", + "Lab", + "Protocol", + "User", + "Project", + "Session", + # paths + "get_ephys_root_data_dir", + "get_session_directory", + "get_electrode_localization_dir", + # export + "subject_to_nwb", + "session_to_nwb", + "element_lab_to_nwb_dict", +] # Activate "lab", "subject", "session" schema --------------------------------- -lab.activate(db_prefix + 'lab') +lab.activate(db_prefix + "lab") -subject.activate(db_prefix + 'subject', linking_module=__name__) +subject.activate(db_prefix + "subject", linking_module=__name__) Experimenter = lab.User -session.activate(db_prefix + 'session', linking_module=__name__) +session.activate(db_prefix + "session", linking_module=__name__) # Activate "event" and "trial" schema --------------------------------- @@ -71,27 +88,26 @@ # Declare table "SkullReference" for use in element-array-ephys --------------- + @lab.schema class SkullReference(dj.Lookup): definition = """ skull_reference : varchar(60) """ - contents = zip(['Bregma', 'Lambda']) + contents = zip(["Bregma", "Lambda"]) # Activate "ephys" schema ----------------------------------------------------- -ephys.activate(db_prefix + 'ephys', - db_prefix + 'probe', - linking_module=__name__) +ephys.activate(db_prefix + "ephys", db_prefix + "probe", linking_module=__name__) # Activate "electrode-localization" schema ------------------------------------ ProbeInsertion = ephys.ProbeInsertion -electrode_localization.activate(db_prefix + 'electrode_localization', - db_prefix + 'ccf', - linking_module=__name__) +electrode_localization.activate( + db_prefix + "electrode_localization", db_prefix + "ccf", linking_module=__name__ +) ccf_id = 0 # Atlas ID voxel_resolution = 100 diff --git a/workflow_array_ephys/plotting/plot_psth.py b/workflow_array_ephys/plotting/plot_psth.py index 30297dbd..18d06bfb 100644 --- a/workflow_array_ephys/plotting/plot_psth.py +++ b/workflow_array_ephys/plotting/plot_psth.py @@ -2,8 +2,9 @@ import matplotlib.pyplot as plt -def _plot_spike_raster(aligned_spikes, trial_ids=None, vlines=[0], ax=None, title='', - xlim=None): +def _plot_spike_raster( + aligned_spikes, trial_ids=None, vlines=[0], ax=None, title="", xlim=None +): if not ax: fig, ax = plt.subplots(1, 1) @@ -11,37 +12,38 @@ def _plot_spike_raster(aligned_spikes, trial_ids=None, vlines=[0], ax=None, titl if trial_ids is None: trial_ids = range(len(aligned_spikes)) - trial_ids = np.concatenate([[t] * len(s) - for t, s in zip(trial_ids, aligned_spikes)]).astype(int) + trial_ids = np.concatenate( + [[t] * len(s) for t, s in zip(trial_ids, aligned_spikes)] + ).astype(int) assert len(raster) == len(trial_ids) - ax.plot(raster, trial_ids, 'ro', markersize=4) + ax.plot(raster, trial_ids, "ro", markersize=4) for x in vlines: - ax.axvline(x=x, linestyle='--', color='k') + ax.axvline(x=x, linestyle="--", color="k") - ax.set_ylabel('Trial (#)') + ax.set_ylabel("Trial (#)") if xlim: ax.set_xlim(xlim) # ax.set_axis_off() ax.set_title(title) -def _plot_psth(psth, psth_edges, bin_size, vlines=[0], ax=None, title='', xlim=None): +def _plot_psth(psth, psth_edges, bin_size, vlines=[0], ax=None, title="", xlim=None): if not ax: fig, ax = plt.subplots(1, 1) ax.bar(psth_edges, psth, width=bin_size, edgecolor="black", align="edge") for x in vlines: - ax.axvline(x=x, linestyle='--', color='k') + ax.axvline(x=x, linestyle="--", color="k") - ax.set_ylabel('spikes/s') + ax.set_ylabel("spikes/s") ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) if xlim: ax.set_xlim(xlim) - ax.set_xlabel('Time (s)') + ax.set_xlabel("Time (s)") ax.set_title(title) diff --git a/workflow_array_ephys/process.py b/workflow_array_ephys/process.py index 91568b1d..88a30e7b 100644 --- a/workflow_array_ephys/process.py +++ b/workflow_array_ephys/process.py @@ -3,25 +3,27 @@ def run(display_progress=True, reserve_jobs=False, suppress_errors=False): - populate_settings = {'display_progress': display_progress, - 'reserve_jobs': reserve_jobs, - 'suppress_errors': suppress_errors} + populate_settings = { + "display_progress": display_progress, + "reserve_jobs": reserve_jobs, + "suppress_errors": suppress_errors, + } - print('\n---- Populate ephys.EphysRecording ----') + print("\n---- Populate ephys.EphysRecording ----") ephys.EphysRecording.populate(**populate_settings) - print('\n---- Populate ephys.LFP ----') + print("\n---- Populate ephys.LFP ----") ephys.LFP.populate(**populate_settings) - print('\n---- Populate ephys.Clustering ----') + print("\n---- Populate ephys.Clustering ----") ephys.Clustering.populate(**populate_settings) - print('\n---- Populate ephys.CuratedClustering ----') + print("\n---- Populate ephys.CuratedClustering ----") ephys.CuratedClustering.populate(**populate_settings) - print('\n---- Populate ephys.WaveformSet ----') + print("\n---- Populate ephys.WaveformSet ----") ephys.WaveformSet.populate(**populate_settings) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/workflow_array_ephys/version.py b/workflow_array_ephys/version.py index 3105c048..ee1b0bb5 100644 --- a/workflow_array_ephys/version.py +++ b/workflow_array_ephys/version.py @@ -2,4 +2,4 @@ Package metadata Update the Docker image tag in `docker-compose.yaml` to match """ -__version__ = '0.1.0' +__version__ = "0.1.0"