Merge pull request #6 from catalystneuro/bug_fixes

Fix major bugs/typos
catalystneuro · Oct 27, 2023 · e137407 · e137407
2 parents e45bf47 + 2229d1b
commit e137407
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -3,9 +3,7 @@ NWB conversion scripts for Jazayeri lab data to the [Neurodata Without Borders](
 
 
 ## Installation
-
-## Installation from Github
-Another option is to install the package directly from Github. This option has the advantage that the source code can be modifed if you need to amend some of the code we originally provided to adapt to future experimental differences. To install the conversion from GitHub you will need to use `git` ([installation instructions](https://github.com/git-guides/install-git)). We also recommend the installation of `conda` ([installation instructions](https://docs.conda.io/en/latest/miniconda.html)) as it contains all the required machinery in a single and simple instal
+The package can be installed from this GitHub repo, which has the advantage that the source code can be modifed if you need to amend some of the code we originally provided to adapt to future experimental differences. To install the conversion from GitHub you will need to use `git` ([installation instructions](https://github.com/git-guides/install-git)). The package also requires Python 3.9 or 3.10. We also recommend the installation of `conda` ([installation instructions](https://docs.conda.io/en/latest/miniconda.html)) as it contains all the required machinery in a single and simple instal
 
 From a terminal (note that conda should install one in your system) you can do the following:
 
@@ -18,7 +16,7 @@ conda activate jazayeri-lab-to-nwb-env
 
 This creates a [conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) which isolates the conversion code from your system libraries.  We recommend that you run all your conversion related tasks and analysis from the created environment in order to minimize issues related to package dependencies.
 
-Alternatively, if you want to avoid conda altogether (for example if you use another virtual environment tool) you can install the repository with the following commands using only pip:
+Alternatively, if you have Python 3.9 or 3.10 on your machine and you want to avoid conda altogether (for example if you use another virtual environment tool) you can install the repository with the following commands using only pip:
 
 ```
 git clone https://github.com/catalystneuro/jazayeri-lab-to-nwb
@@ -41,8 +39,7 @@ Each conversion is organized in a directory of its own in the `src` directory:
     ├── setup.py
     └── src
         ├── jazayeri_lab_to_nwb
-        │   ├── conversion_directory_1
-        │   └── watters
+        │   ├── watters
         │       ├── wattersbehaviorinterface.py
         │       ├── watters_convert_session.py
         │       ├── watters_metadata.yml
@@ -51,7 +48,8 @@ Each conversion is organized in a directory of its own in the `src` directory:
         │       ├── watters_notes.md
 
         │       └── __init__.py
-        │   ├── conversion_directory_b
+
+        │   └── another_conversion
 
         └── __init__.py
 
@@ -75,14 +73,15 @@ pip install -r src/jazayeri_lab_to_nwb/watters/watters_requirements.txt
 
 You can run a specific conversion with the following command:
 ```
-python src/jazayeri_lab_to_nwb/watters/watters_conversion_script.py
+python src/jazayeri_lab_to_nwb/watters/watters_convert_session.py
 ```
 
 ### Watters working memory task data
-The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/watters_conversion_script.py`. The function takes three arguments:
+The conversion function for this experiment, `session_to_nwb`, is found in `src/watters/watters_convert_session.py`. The function takes three arguments:
 * `data_dir_path` points to the root directory for the data for a given session.
 * `output_dir_path` points to where the converted data should be saved.
 * `stub_test` indicates whether only a small portion of the data should be saved (mainly used by us for testing purposes).
+* `overwrite` indicates whether existing NWB files at the auto-generated output file paths should be overwritten.
 
 The function can be imported in a separate script with and run, or you can run the file directly and specify the arguments in the `if name == "__main__"` block at the bottom.
 

diff --git a/make_env.yml b/make_env.yml
@@ -3,7 +3,7 @@ channels:
 - conda-forge
 - defaults
 dependencies:
-- python>=3.9
+- python>=3.9,<3.11
 - pip
 - pip:
   - -e .  # This calls the setup and therefore requirements minimal
diff --git a/setup.py b/setup.py
@@ -21,6 +21,6 @@
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     include_package_data=True,
-    python_requires=">=3.8",
+    python_requires=">=3.8,<3.11",
     install_requires=install_requires,
 )
diff --git a/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py b/src/jazayeri_lab_to_nwb/watters/watters_convert_session.py
@@ -1,28 +1,58 @@
 """Primary script to run to convert an entire session for of data using the NWBConverter."""
-from pathlib import Path
-from typing import Union
+
 import datetime
 import glob
 import json
-from zoneinfo import ZoneInfo
+import logging
+from pathlib import Path
+from typing import Union
 from uuid import uuid4
+from zoneinfo import ZoneInfo
 
 from neuroconv.utils import load_dict_from_file, dict_deep_update
 
 from jazayeri_lab_to_nwb.watters import WattersNWBConverter
 
+# Set logger level for info is displayed in console
+logging.getLogger().setLevel(logging.INFO)
+
 
-def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str, Path], stub_test: bool = False):
+def _get_single_file(directory, suffix=""):
+    """Get path to a file in given directory with given suffix.
 
-    data_dir_path = Path(data_dir_path)
+    Raisees error if not exactly one satisfying file.
+    """
+    files = list(glob.glob(str(directory / f"*{suffix}")))
+    if len(files) == 0:
+        raise ValueError(f"No {suffix} files found in {directory}")
+    if len(files) > 1:
+        raise ValueError(f"Multiple {suffix} files found in {directory}")
+    return files[0]
+
+
+def session_to_nwb(
+    data_dir: Union[str, Path],
+    output_dir_path: Union[str, Path],
+    stub_test: bool = False,
+    overwrite: bool = False,
+):
+
+    logging.info("")
+    logging.info(f"data_dir = {data_dir}")
+    logging.info(f"output_dir_path = {output_dir_path}")
+    logging.info(f"stub_test = {stub_test}")
+
+    data_dir = Path(data_dir)
     output_dir_path = Path(output_dir_path)
     if stub_test:
         output_dir_path = output_dir_path / "nwb_stub"
     output_dir_path.mkdir(parents=True, exist_ok=True)
 
-    session_id = f"ses-{data_dir_path.name}"
+    session_id = f"ses-{data_dir.name}"
     raw_nwbfile_path = output_dir_path / f"{session_id}_raw.nwb"
     processed_nwbfile_path = output_dir_path / f"{session_id}_processed.nwb"
+    logging.info(f"raw_nwbfile_path = {raw_nwbfile_path}")
+    logging.info(f"processed_nwbfile_path = {processed_nwbfile_path}")
 
     raw_source_data = dict()
     raw_conversion_options = dict()
@@ -31,16 +61,18 @@ def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str,
 
     for probe_num in range(2):
         # Add V-Probe Recording
-        if not (data_dir_path / "raw_data" / f"v_probe_{probe_num}").exists():
+        probe_data_dir = data_dir / "raw_data" / f"v_probe_{probe_num}"
+        if not probe_data_dir.exists():
             continue
-        recording_files = list(glob.glob(str(data_dir_path / "raw_data" / f"v_probe_{probe_num}" / "*.dat")))
-        assert len(recording_files) > 0, f"No .dat files found in {data_dir_path}"
-        assert len(recording_files) == 1, f"Multiple .dat files found in {data_dir_path}"
+        logging.info(f"\nAdding V-probe {probe_num} recording")
+
+        logging.info("    Raw data")
+        recording_file = _get_single_file(probe_data_dir, suffix=".dat")
         recording_source_data = {
             f"RecordingVP{probe_num}": dict(
-                file_path=str(recording_files[0]),
-                probe_metadata_file=str(data_dir_path / "data_open_source" / "probes.metadata.json"),
-                probe_key=f"probe{(probe_num+1):02d}",
+                file_path=recording_file,
+                probe_metadata_file=str(data_dir / "data_open_source" / "probes.metadata.json"),
+                probe_key=f"probe{(probe_num + 1):02d}",
                 probe_name=f"vprobe{probe_num}",
                 es_key=f"ElectricalSeriesVP{probe_num}",
             )
@@ -53,85 +85,87 @@ def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str,
         )
 
         # Add V-Probe Sorting
+        logging.info("    Spike sorted data")
         processed_source_data.update(
             {
                 f"SortingVP{probe_num}": dict(
-                    folder_path=str(data_dir_path / "spike_sorting_raw" / f"v_probe_{probe_num}"),
+                    folder_path=str(data_dir / "spike_sorting_raw" / f"v_probe_{probe_num}"),
                     keep_good_only=False,
                 )
             }
         )
         processed_conversion_options.update({f"SortingVP{probe_num}": dict(stub_test=stub_test, write_as="processing")})
 
-    # Add Recording
-    recording_files = list(glob.glob(str(data_dir_path / "raw_data" / "spikeglx" / "*" / "*" / "*.ap.bin")))
-    assert len(recording_files) > 0, f"No .ap.bin files found in {data_dir_path}"
-    assert len(recording_files) == 1, f"Multiple .ap.bin files found in {data_dir_path}"
-    raw_source_data.update(dict(RecordingNP=dict(file_path=str(recording_files[0]))))
-    processed_source_data.update(dict(RecordingNP=dict(file_path=str(recording_files[0]))))
+    # Add SpikeGLX Recording
+    logging.info("Adding SpikeGLX recordings")
+    logging.info("    AP data")
+    probe_data_dir = data_dir / "raw_data" / "spikeglx" / "*" / "*"
+    ap_file = _get_single_file(probe_data_dir, suffix=".ap.bin")
+    raw_source_data.update(dict(RecordingNP=dict(file_path=ap_file)))
+    processed_source_data.update(dict(RecordingNP=dict(file_path=ap_file)))
     raw_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test)))
     processed_conversion_options.update(dict(RecordingNP=dict(stub_test=stub_test, write_electrical_series=False)))
 
     # Add LFP
-    lfp_files = list(glob.glob(str(data_dir_path / "raw_data" / "spikeglx" / "*" / "*" / "*.lf.bin")))
-    assert len(lfp_files) > 0, f"No .lf.bin files found in {data_dir_path}"
-    assert len(lfp_files) == 1, f"Multiple .lf.bin files found in {data_dir_path}"
-    raw_source_data.update(dict(LF=dict(file_path=str(lfp_files[0]))))
-    processed_source_data.update(dict(LF=dict(file_path=str(lfp_files[0]))))
+    logging.info("    LFP data")
+    lfp_file = _get_single_file(probe_data_dir, suffix=".lf.bin")
+    raw_source_data.update(dict(LF=dict(file_path=lfp_file)))
+    processed_source_data.update(dict(LF=dict(file_path=lfp_file)))
     raw_conversion_options.update(dict(LF=dict(stub_test=stub_test)))
     processed_conversion_options.update(dict(LF=dict(stub_test=stub_test, write_electrical_series=False)))
 
     # Add Sorting
+    logging.info("    Spike sorted data")
     processed_source_data.update(
         dict(
             SortingNP=dict(
-                folder_path=str(data_dir_path / "spike_sorting_raw" / "np"),
+                folder_path=str(data_dir / "spike_sorting_raw" / "np"),
                 keep_good_only=False,
             )
         )
     )
     processed_conversion_options.update(dict(SortingNP=dict(stub_test=stub_test, write_as="processing")))
 
     # Add Behavior
-    processed_source_data.update(
-        dict(EyePosition=dict(folder_path=str(data_dir_path / "data_open_source" / "behavior")))
-    )
+    logging.info("Adding behavior")
+    behavior_path = str(data_dir / "data_open_source" / "behavior")
+    processed_source_data.update(dict(EyePosition=dict(folder_path=behavior_path)))
     processed_conversion_options.update(dict(EyePosition=dict()))
 
-    processed_source_data.update(dict(PupilSize=dict(folder_path=str(data_dir_path / "data_open_source" / "behavior"))))
+    processed_source_data.update(dict(PupilSize=dict(folder_path=behavior_path)))
     processed_conversion_options.update(dict(PupilSize=dict()))
 
     # Add Trials
-    processed_source_data.update(dict(Trials=dict(folder_path=str(data_dir_path / "data_open_source"))))
+    logging.info("Adding task data")
+    processed_source_data.update(dict(Trials=dict(folder_path=str(data_dir / "data_open_source"))))
     processed_conversion_options.update(dict(Trials=dict()))
 
-    processed_converter = WattersNWBConverter(
-        source_data=processed_source_data, sync_dir=str(data_dir_path / "sync_pulses")
-    )
+    processed_converter = WattersNWBConverter(source_data=processed_source_data, sync_dir=str(data_dir / "sync_pulses"))
 
     # Add datetime to conversion
-    metadata = processed_converter.get_metadata()  # use processed b/c it has everything
+    metadata = processed_converter.get_metadata()
     metadata["NWBFile"]["session_id"] = session_id
 
     # Subject name
-    if "monkey0" in str(data_dir_path):
+    if "monkey0" in str(data_dir):
         metadata["Subject"]["subject_id"] = "Perle"
-    elif "monkey1" in str(data_dir_path):
+    elif "monkey1" in str(data_dir):
         metadata["Subject"]["subject_id"] = "Elgar"
 
     # EcePhys
-    probe_metadata_file = data_dir_path / "data_open_source" / "probes.metadata.json"
+    probe_metadata_file = data_dir / "data_open_source" / "probes.metadata.json"
     with open(probe_metadata_file, "r") as f:
         probe_metadata = json.load(f)
     neuropixel_metadata = [entry for entry in probe_metadata if entry["label"] == "probe00"][0]
     for entry in metadata["Ecephys"]["ElectrodeGroup"]:
         if entry["device"] == "Neuropixel-Imec":
+            # TODO: uncomment when fixed in pynwb
             # entry.update(dict(position=[(
             #     neuropixel_metadata["coordinates"][0],
             #     neuropixel_metadata["coordinates"][1],
             #     neuropixel_metadata["depth_from_surface"],
             # )]
-            pass  # TODO: uncomment when fixed in pynwb
+            logging.warning("\n\n    PROBE COORDINATES NOT IMPLEMENTED\n\n")
 
     # Update default metadata with the editable in the corresponding yaml file
     editable_metadata_path = Path(__file__).parent / "watters_metadata.yaml"
@@ -141,35 +175,45 @@ def session_to_nwb(data_dir_path: Union[str, Path], output_dir_path: Union[str,
     # check if session_start_time was found/set
     if "session_start_time" not in metadata["NWBFile"]:
         try:
-            date = datetime.datetime.strptime(data_dir_path.name, "%Y-%m-%d").replace(tzinfo=ZoneInfo("US/Eastern"))
+            date = datetime.datetime.strptime(data_dir.name, "%Y-%m-%d")
+            date = date.replace(tzinfo=ZoneInfo("US/Eastern"))
         except:
-            raise AssertionError(
-                "Session start time was not auto-detected. Please provide it in `watters_metadata.yaml`"
+            raise ValueError(
+                "Session start time was not auto-detected. Please provide it " "in `watters_metadata.yaml`"
             )
         metadata["NWBFile"]["session_start_time"] = date
 
     # Run conversion
+    logging.info("Running processed conversion")
     processed_converter.run_conversion(
-        metadata=metadata, nwbfile_path=processed_nwbfile_path, conversion_options=processed_conversion_options
+        metadata=metadata,
+        nwbfile_path=processed_nwbfile_path,
+        conversion_options=processed_conversion_options,
+        overwrite=overwrite,
     )
 
+    logging.info("Running raw data conversion")
     metadata["NWBFile"]["identifier"] = str(uuid4())
-    raw_converter = WattersNWBConverter(source_data=raw_source_data, sync_dir=str(data_dir_path / "sync_pulses"))
+    raw_converter = WattersNWBConverter(source_data=raw_source_data, sync_dir=str(data_dir / "sync_pulses"))
     raw_converter.run_conversion(
-        metadata=metadata, nwbfile_path=raw_nwbfile_path, conversion_options=raw_conversion_options
+        metadata=metadata,
+        nwbfile_path=raw_nwbfile_path,
+        conversion_options=raw_conversion_options,
+        overwrite=overwrite,
     )
 
 
 if __name__ == "__main__":
 
     # Parameters for conversion
-    data_dir_path = Path("/shared/catalystneuro/JazLab/monkey0/2022-06-01/")
-    # data_dir_path = Path("/shared/catalystneuro/JazLab/monkey1/2022-06-05/")
-    output_dir_path = Path("~/conversion_nwb/jazayeri-lab-to-nwb/watters_perle_combined/").expanduser()
+    data_dir = Path("/om2/user/nwatters/catalystneuro/initial_data_transfer/" "monkey0/2022-06-01/")
+    output_dir_path = Path("/om/user/nwatters/nwb_data/watters_perle_combined/")
     stub_test = True
+    overwrite = True
 
     session_to_nwb(
-        data_dir_path=data_dir_path,
+        data_dir=data_dir,
         output_dir_path=output_dir_path,
         stub_test=stub_test,
+        overwrite=overwrite,
     )
diff --git a/src/jazayeri_lab_to_nwb/watters/watters_requirements.txt b/src/jazayeri_lab_to_nwb/watters/watters_requirements.txt
@@ -1,3 +0,0 @@
-nwb-conversion-tools==0.11.1   # Example of specific pinned dependecy
-some-extra-package==1.11.3  # Example of another extra package that's necessary for the current conversion
-roiextractors @ git+https://github.com/catalystneuro/roiextractors.git@8db5f9cb3a7ee5efee49b7fd0b694c7a8105519a # Github pinned dependency

diff --git a/src/jazayeri_lab_to_nwb/watters/wattersnwbconverter.py b/src/jazayeri_lab_to_nwb/watters/wattersnwbconverter.py
@@ -1,5 +1,6 @@
 """Primary NWBConverter class for this dataset."""
 import json
+import logging
 import numpy as np
 from typing import Optional
 from pathlib import Path
@@ -18,7 +19,7 @@
 from spikeinterface.core.waveform_tools import has_exceeding_spikes
 from spikeinterface.curation import remove_excess_spikes
 
-from jazayeri_lab_to_nwb.watters import (
+from . import (
     WattersDatRecordingInterface,
     WattersEyePositionInterface,
     WattersPupilSizeInterface,
@@ -62,6 +63,8 @@ def __init__(
                 unit_name_start += np.max(unit_ids) + 1
 
     def temporally_align_data_interfaces(self):
+        logging.info("Temporally aligning data interfaces")
+
         if self.sync_dir is None:
             return
         sync_dir = Path(self.sync_dir)