Merge branch 'main' into fix_dev_tests

catalystneuro · Dec 13, 2024 · 06615cb · 06615cb
2 parents 64c00ba + 43477de
commit 06615cb
Show file tree

Hide file tree

Showing 60 changed files with 1,698 additions and 437 deletions.
diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml
@@ -69,6 +69,9 @@ jobs:
     if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
     uses: ./.github/workflows/live-service-testing.yml
     secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }}
       DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
     with:  #  Ternary operator: condition && value_if_true || value_if_false
       python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }}

diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml
@@ -13,6 +13,12 @@ on:
         type: string
 
     secrets:
+      AWS_ACCESS_KEY_ID:
+        required: true
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+      S3_GIN_BUCKET:
+        required: true
       DANDI_API_KEY:
         required: true
 
@@ -45,7 +51,17 @@ jobs:
       - name: Install full requirements
         run: pip install .[test,full]
 
+      - name: Prepare data for tests
+        uses: ./.github/actions/load-data
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }}
+          os: ${{ matrix.os }}
+
       - name: Run subset of tests that use DANDI live services
         run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py
+      - name: Run subset of tests that use DANDI live services with YAML
+        run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py
       - name: Run subset of tests that use Globus live services
         run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py
diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml
@@ -0,0 +1,46 @@
+name: NeuroConv Deployment AWS Tests
+on:
+  schedule:
+    - cron: "0 16 * * 3"  # Weekly at noon on Wednesday
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }}
+  RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }}
+  RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }}
+  DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
+
+jobs:
+  run:
+    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - run: git fetch --prune --unshallow --tags
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Global Setup
+        run: |
+          python -m pip install -U pip  # Official recommended way
+          git config --global user.email "[email protected]"
+          git config --global user.name "CI Almighty"
+
+      - name: Install AWS requirements
+        run: pip install .[aws,test]
+
+      - name: Run NeuroConv Deployment on AWS tests
+        run: pytest -rsx -n auto tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,35 +1,43 @@
-# Upcoming
-
-## Features
-* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085)
-
-
-
-## v0.6.4
+# v0.6.6 (Upcoming)
 
 ## Deprecations
-* Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126)
+* Removed use of `jsonschema.RefResolver` as it will be deprecated from the jsonschema library [PR #1133](https://github.com/catalystneuro/neuroconv/pull/1133)
+* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126)
 
 ## Bug Fixes
 * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126)
+* Make `NWBMetaDataEncoder` public again [PR #1142](https://github.com/catalystneuro/neuroconv/pull/1142)
 * Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144)
+* `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
+* `SpikeGLXNIDQInterface` is no longer written as an ElectricalSeries [#1152](https://github.com/catalystneuro/neuroconv/pull/1152)
+
 
 ## Features
 * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124)
 * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
 * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)
+* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior equivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150)
+* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085)
+* Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086)
+* YAML specification files now accepts an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089)
+*`SpikeGLXNIDQInterface` now handdles digital demuxed channels (`XD0`) [#1152](https://github.com/catalystneuro/neuroconv/pull/1152)
+
+
+
 
 ## Improvements
 * Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136)
+* Use pytest format for dandi tests to avoid window permission error on teardown [PR #1151](https://github.com/catalystneuro/neuroconv/pull/1151)
+* Added many docstrings for public functions [PR #1063](https://github.com/catalystneuro/neuroconv/pull/1063)
 
 # v0.6.5 (November 1, 2024)
 
-## Deprecations
-
 ## Bug Fixes
 * Fixed formatwise installation from pipy [PR #1118](https://github.com/catalystneuro/neuroconv/pull/1118)
 * Fixed dailies [PR #1113](https://github.com/catalystneuro/neuroconv/pull/1113)
 
+## Deprecations
+
 ## Features
 * Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068)
 * Data interfaces now perform source (argument inputs) validation with the json schema  [PR #1020](https://github.com/catalystneuro/neuroconv/pull/1020)

diff --git a/docs/api/utils.rst b/docs/api/utils.rst
@@ -8,6 +8,8 @@ Dictionaries
 JSON Schema
 -----------
 .. automodule:: neuroconv.utils.json_schema
+    :members:
+    :exclude-members: NWBMetaDataEncoder
 
 Common Reused Types
 -------------------

diff --git a/docs/conversion_examples_gallery/recording/spikeglx.rst b/docs/conversion_examples_gallery/recording/spikeglx.rst
@@ -24,7 +24,7 @@ We can easily convert all data stored in the native SpikeGLX folder structure to
     >>>
     >>> folder_path = f"{ECEPHY_DATA_PATH}/spikeglx/Noise4Sam_g0"
     >>> converter = SpikeGLXConverterPipe(folder_path=folder_path)
-    >>>
+    Source data is valid!
     >>> # Extract what metadata we can from the source files
     >>> metadata = converter.get_metadata()
     >>> # For data provenance we add the time zone information to the conversion

diff --git a/pyproject.toml b/pyproject.toml
@@ -50,7 +50,8 @@ dependencies = [
     "parse>=1.20.0",
     "click",
     "docstring-parser",
-    "packaging"  # Issue 903
+    "packaging",  # Issue 903
+    "referencing",
 ]
 
 
@@ -355,7 +356,7 @@ doctest_optionflags = "ELLIPSIS"
 
 [tool.black]
 line-length = 120
-target-version = ['py38', 'py39', 'py310']
+target-version = ['py39', 'py310']
 include = '\.pyi?$'
 extend-exclude = '''
 /(

diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py
@@ -19,12 +19,11 @@
 )
 from .tools.nwb_helpers._metadata_and_file_helpers import _resolve_backend
 from .utils import (
-    _NWBMetaDataEncoder,
     get_json_schema_from_method_signature,
     load_dict_from_file,
 )
 from .utils.dict import DeepDict
-from .utils.json_schema import _NWBSourceDataEncoder
+from .utils.json_schema import _NWBMetaDataEncoder, _NWBSourceDataEncoder
 
 
 class BaseDataInterface(ABC):
@@ -37,7 +36,14 @@ class BaseDataInterface(ABC):
 
     @classmethod
     def get_source_schema(cls) -> dict:
-        """Infer the JSON schema for the source_data from the method signature (annotation typing)."""
+        """
+        Infer the JSON schema for the source_data from the method signature (annotation typing).
+
+        Returns
+        -------
+        dict
+            The JSON schema for the source_data.
+        """
         return get_json_schema_from_method_signature(cls, exclude=["source_data"])
 
     @classmethod

diff --git a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py
@@ -111,6 +111,28 @@ def add_to_nwbfile(
         starting_frames_labeled_videos: Optional[list[int]] = None,
         stub_test: bool = False,
     ):
+        """
+        Add behavior and pose estimation data, including original and labeled videos, to the specified NWBFile.
+
+        Parameters
+        ----------
+        nwbfile : NWBFile
+            The NWBFile object to which the data will be added.
+        metadata : dict
+            Metadata dictionary containing information about the behavior and videos.
+        reference_frame : str, optional
+            Description of the reference frame for pose estimation, by default None.
+        confidence_definition : str, optional
+            Definition for the confidence levels in pose estimation, by default None.
+        external_mode : bool, optional
+            If True, the videos will be referenced externally rather than embedded within the NWB file, by default True.
+        starting_frames_original_videos : list of int, optional
+            List of starting frames for the original videos, by default None.
+        starting_frames_labeled_videos : list of int, optional
+            List of starting frames for the labeled videos, by default None.
+        stub_test : bool, optional
+            If True, only a subset of the data will be added for testing purposes, by default False.
+        """
         original_video_interface = self.data_interface_objects["OriginalVideo"]
 
         original_video_metadata = next(
@@ -172,6 +194,33 @@ def run_conversion(
         starting_frames_labeled_videos: Optional[list] = None,
         stub_test: bool = False,
     ) -> None:
+        """
+        Run the full conversion process, adding behavior, video, and pose estimation data to an NWB file.
+
+        Parameters
+        ----------
+        nwbfile_path : FilePath, optional
+            The file path where the NWB file will be saved. If None, the file is handled in memory.
+        nwbfile : NWBFile, optional
+            An in-memory NWBFile object. If None, a new NWBFile object will be created.
+        metadata : dict, optional
+            Metadata dictionary for describing the NWB file contents. If None, it is auto-generated.
+        overwrite : bool, optional
+            If True, overwrites the NWB file at `nwbfile_path` if it exists. If False, appends to the file, by default False.
+        reference_frame : str, optional
+            Description of the reference frame for pose estimation, by default None.
+        confidence_definition : str, optional
+            Definition for confidence levels in pose estimation, by default None.
+        external_mode : bool, optional
+            If True, the videos will be referenced externally rather than embedded within the NWB file, by default True.
+        starting_frames_original_videos : list of int, optional
+            List of starting frames for the original videos, by default None.
+        starting_frames_labeled_videos : list of int, optional
+            List of starting frames for the labeled videos, by default None.
+        stub_test : bool, optional
+            If True, only a subset of the data will be added for testing purposes, by default False.
+
+        """
         if metadata is None:
             metadata = self.get_metadata()
 

diff --git a/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py b/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py
@@ -187,6 +187,7 @@ def add_to_nwbfile(
         nwbfile: NWBFile,
         metadata: dict,
     ) -> None:
+
         ndx_events = get_package(package_name="ndx_events", installation_instructions="pip install ndx-events")
         medpc_name_to_info_dict = metadata["MedPC"].get("medpc_name_to_info_dict", None)
         assert medpc_name_to_info_dict is not None, "medpc_name_to_info_dict must be provided in metadata"

diff --git a/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py b/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py
@@ -8,7 +8,8 @@
 
 from .nvt_utils import read_data, read_header
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
-from ....utils import DeepDict, _NWBMetaDataEncoder, get_base_schema
+from ....utils import DeepDict, get_base_schema
+from ....utils.json_schema import _NWBMetaDataEncoder
 from ....utils.path import infer_path
 
 

diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py
@@ -220,6 +220,19 @@ def set_aligned_segment_starting_times(self, aligned_segment_starting_times: lis
                 sorting_segment._t_start = aligned_segment_starting_time
 
     def subset_sorting(self):
+        """
+        Generate a subset of the sorting extractor based on spike timing data.
+
+        This method identifies the earliest spike time across all units in the sorting extractor and creates a
+        subset of the sorting data up to 110% of the earliest spike time. If the sorting extractor is associated
+        with a recording, the subset is further limited by the total number of samples in the recording.
+
+        Returns
+        -------
+        SortingExtractor
+            A new `SortingExtractor` object representing the subset of the original sorting data,
+            sliced from the start frame to the calculated end frame.
+        """
         max_min_spike_time = max(
             [
                 min(x)

diff --git a/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py b/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py
@@ -518,6 +518,33 @@ def __init__(self, file_path: FilePath, verbose: bool = True):
                 )
 
     def generate_recording_with_channel_metadata(self):
+        """
+        Generate a dummy recording extractor with channel metadata from session data.
+
+        This method reads session data from a `.session.mat` file (if available) and generates a dummy recording
+        extractor. The recording extractor is then populated with channel metadata extracted from the session file.
+
+        Returns
+        -------
+        NumpyRecording
+            A `NumpyRecording` object representing the dummy recording extractor, containing the channel metadata.
+
+        Notes
+        -----
+        - The method reads the `.session.mat` file using `pymatreader` and extracts `extracellular` data.
+        - It creates a dummy recording extractor using `spikeinterface.core.numpyextractors.NumpyRecording`.
+        - The generated extractor includes channel IDs and other relevant metadata such as number of channels,
+        number of samples, and sampling frequency.
+        - Channel metadata is added to the dummy extractor using the `add_channel_metadata_to_recoder` function.
+        - If the `.session.mat` file is not found, no extractor is returned.
+
+        Warnings
+        --------
+        Ensure that the `.session.mat` file is correctly located in the expected session path, or the method will not generate
+        a recording extractor. The expected session is self.session_path / f"{self.session_id}.session.mat"
+
+        """
+
         session_data_file_path = self.session_path / f"{self.session_id}.session.mat"
         if session_data_file_path.is_file():
             from pymatreader import read_mat

diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
@@ -29,8 +29,10 @@ def get_source_schema(cls):
 
     @classmethod
     def get_streams(cls, folder_path: DirectoryPath) -> list[str]:
+        "Return the stream ids available in the folder."
         from spikeinterface.extractors import SpikeGLXRecordingExtractor
 
+        # The first entry is the stream ids the second is the stream names
         return SpikeGLXRecordingExtractor.get_streams(folder_path=folder_path)[0]
 
     @validate_call
@@ -61,28 +63,17 @@ def __init__(
         """
         folder_path = Path(folder_path)
 
-        streams = streams or self.get_streams(folder_path=folder_path)
+        streams_ids = streams or self.get_streams(folder_path=folder_path)
 
         data_interfaces = dict()
-        for stream in streams:
-            if "ap" in stream:
-                probe_name = stream[:5]
-                file_path = (
-                    folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.ap.bin"
-                )
-                es_key = f"ElectricalSeriesAP{probe_name.capitalize()}"
-                interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
-            elif "lf" in stream:
-                probe_name = stream[:5]
-                file_path = (
-                    folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.lf.bin"
-                )
-                es_key = f"ElectricalSeriesLF{probe_name.capitalize()}"
-                interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key)
-            elif "nidq" in stream:
-                file_path = folder_path / f"{folder_path.stem}_t0.nidq.bin"
-                interface = SpikeGLXNIDQInterface(file_path=file_path)
-            data_interfaces.update({str(stream): interface})  # Without str() casting, is a numpy string
+
+        nidq_streams = [stream_id for stream_id in streams_ids if stream_id == "nidq"]
+        electrical_streams = [stream_id for stream_id in streams_ids if stream_id not in nidq_streams]
+        for stream_id in electrical_streams:
+            data_interfaces[stream_id] = SpikeGLXRecordingInterface(folder_path=folder_path, stream_id=stream_id)
+
+        for stream_id in nidq_streams:
+            data_interfaces[stream_id] = SpikeGLXNIDQInterface(folder_path=folder_path)
 
         super().__init__(data_interfaces=data_interfaces, verbose=verbose)