From 3b798bcdd9e3274a6e3915702d43a4bff40e7510 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 8 Jul 2024 08:54:15 -0600
Subject: [PATCH 01/31] Add option to supress tqdm progress bar in
 `VideoContext` (#937)

---
 CHANGELOG.md                                     |  1 +
 .../datainterfaces/behavior/video/video_utils.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fcfff01e..25a4f1f60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@
 
 ### Improvements
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
+* Add an option to suppress display the progress bar (tqdm) in `VideoContext`  [PR #937](https://github.com/catalystneuro/neuroconv/pull/937)
 
 ## v0.4.11 (June 14, 2024)
 
diff --git a/src/neuroconv/datainterfaces/behavior/video/video_utils.py b/src/neuroconv/datainterfaces/behavior/video/video_utils.py
index df70ee77b..5000c468b 100644
--- a/src/neuroconv/datainterfaces/behavior/video/video_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/video/video_utils.py
@@ -9,7 +9,9 @@
 from ....utils import FilePathType
 
 
-def get_video_timestamps(file_path: FilePathType, max_frames: Optional[int] = None) -> list:
+def get_video_timestamps(
+    file_path: FilePathType, max_frames: Optional[int] = None, display_progress: bool = True
+) -> list:
     """Extract the timestamps of the video located in file_path
 
     Parameters
@@ -26,7 +28,7 @@ def get_video_timestamps(file_path: FilePathType, max_frames: Optional[int] = No
     """
 
     with VideoCaptureContext(str(file_path)) as video_context:
-        timestamps = video_context.get_video_timestamps(max_frames=max_frames)
+        timestamps = video_context.get_video_timestamps(max_frames=max_frames, display_progress=display_progress)
 
     return timestamps
 
@@ -43,14 +45,20 @@ def __init__(self, file_path: FilePathType):
         self._frame_count = None
         self._video_open_msg = "The video file is not open!"
 
-    def get_video_timestamps(self, max_frames=None):
+    def get_video_timestamps(self, max_frames: Optional[int] = None, display_progress: bool = True):
         """Return numpy array of the timestamps(s) for a video file."""
         cv2 = get_package(package_name="cv2", installation_instructions="pip install opencv-python-headless")
 
         timestamps = []
         total_frames = self.get_video_frame_count()
         frames_to_extract = min(total_frames, max_frames) if max_frames else total_frames
-        for _ in tqdm(range(frames_to_extract), desc="retrieving timestamps"):
+
+        iterator = (
+            tqdm(range(frames_to_extract), desc="retrieving timestamps")
+            if display_progress
+            else range(frames_to_extract)
+        )
+        for _ in iterator:
             success, _ = self.vc.read()
             if not success:
                 break

From 54f12f79c123bd91b4b88f1b66d87b412cb158bf Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 8 Jul 2024 09:27:45 -0600
Subject: [PATCH 02/31] Remove wrong assumptions about electrode metadata in
 Intan (#933)

Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 .../ecephys/intan/intandatainterface.py       | 71 +------------------
 2 files changed, 2 insertions(+), 70 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 25a4f1f60..fe0f90ed6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922)
 * Fixed a case of the `NeuroScopeSortingExtractor` when the optional `xml_file_path` is not specified. [PR #926](https://github.com/catalystneuro/neuroconv/pull/926)
 * Fixed `Can't specify experiment type when converting .abf to .nwb with Neuroconv`. [PR #609](https://github.com/catalystneuro/neuroconv/pull/609)
+* Remove assumption that the ports of the Intan acquisition system correspond to electrode groupings in `IntanRecordingInterface`  [PR #933](https://github.com/catalystneuro/neuroconv/pull/933)
 
 ### Improvements
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
diff --git a/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py b/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
index fae795cc4..2952a3dee 100644
--- a/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
@@ -9,32 +9,6 @@
 from ....utils import FilePathType, get_schema_from_hdmf_class
 
 
-def extract_electrode_metadata(recording_extractor) -> dict:
-
-    neo_version = get_package_version(name="neo")
-
-    # The native native_channel_name in Intan have the following form: A-000, A-001, A-002, B-000, B-001, B-002, etc.
-    if neo_version > Version("0.13.0"):  # TODO: Remove after the release of neo 0.14.0
-        native_channel_names = recording_extractor.get_channel_ids()
-    else:
-        # Previous to version 0.13.1 the native_channel_name was stored as channel_name
-        native_channel_names = recording_extractor.get_property("channel_name")
-
-    group_names = [channel.split("-")[0] for channel in native_channel_names]
-    unique_group_names = set(group_names)
-    group_electrode_numbers = [int(channel.split("-")[1]) for channel in native_channel_names]
-    custom_names = list()
-
-    electrodes_metadata = dict(
-        group_names=group_names,
-        unique_group_names=unique_group_names,
-        group_electrode_numbers=group_electrode_numbers,
-        custom_names=custom_names,
-    )
-
-    return electrodes_metadata
-
-
 class IntanRecordingInterface(BaseRecordingExtractorInterface):
     """
     Primary data interface class for converting Intan data using the
@@ -85,7 +59,7 @@ def __init__(
             )
             self.stream_id = stream_id
         else:
-            self.stream_id = "0"
+            self.stream_id = "0"  # These are the amplifier channels or to the stream_name 'RHD2000 amplifier channel'
 
         init_kwargs = dict(
             file_path=file_path,
@@ -108,22 +82,6 @@ def __init__(
             init_kwargs["ignore_integrity_checks"] = ignore_integrity_checks
 
         super().__init__(**init_kwargs)
-        electrodes_metadata = extract_electrode_metadata(recording_extractor=self.recording_extractor)
-
-        group_names = electrodes_metadata["group_names"]
-        group_electrode_numbers = electrodes_metadata["group_electrode_numbers"]
-        unique_group_names = electrodes_metadata["unique_group_names"]
-        custom_names = electrodes_metadata["custom_names"]
-
-        channel_ids = self.recording_extractor.get_channel_ids()
-        self.recording_extractor.set_property(key="group_name", ids=channel_ids, values=group_names)
-        if len(unique_group_names) > 1:
-            self.recording_extractor.set_property(
-                key="group_electrode_number", ids=channel_ids, values=group_electrode_numbers
-            )
-
-        if any(custom_names):
-            self.recording_extractor.set_property(key="custom_channel_name", ids=channel_ids, values=custom_names)
 
     def get_metadata_schema(self) -> dict:
         metadata_schema = super().get_metadata_schema()
@@ -145,36 +103,9 @@ def get_metadata(self) -> dict:
         device_list = [device]
         ecephys_metadata.update(Device=device_list)
 
-        # Add electrode group
-        unique_group_name = set(self.recording_extractor.get_property("group_name"))
-        electrode_group_list = [
-            dict(
-                name=group_name,
-                description=f"Group {group_name} electrodes.",
-                device="Intan",
-                location="",
-            )
-            for group_name in unique_group_name
-        ]
-        ecephys_metadata.update(ElectrodeGroup=electrode_group_list)
-
         # Add electrodes and electrode groups
         ecephys_metadata.update(
-            Electrodes=[
-                dict(name="group_name", description="The name of the ElectrodeGroup this electrode is a part of.")
-            ],
             ElectricalSeriesRaw=dict(name="ElectricalSeriesRaw", description="Raw acquisition traces."),
         )
 
-        # Add group electrode number if available
-        recording_extractor_properties = self.recording_extractor.get_property_keys()
-        if "group_electrode_number" in recording_extractor_properties:
-            ecephys_metadata["Electrodes"].append(
-                dict(name="group_electrode_number", description="0-indexed channel within a group.")
-            )
-        if "custom_channel_name" in recording_extractor_properties:
-            ecephys_metadata["Electrodes"].append(
-                dict(name="custom_channel_name", description="Custom channel name assigned in Intan.")
-            )
-
         return metadata

From dd8ef4fbd031f4abec852fdd19e6c6abb625e8c5 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Mon, 8 Jul 2024 12:03:31 -0400
Subject: [PATCH 03/31] [Cloud Deployment IIa] Rclone docker image extension
 for config file (#902)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: CodyCBakerPhD <codycbakerphd@gmail.com>
---
 ...upload_docker_image_rclone_with_config.yml | 36 ++++++++
 ...sting.yml => neuroconv_docker_testing.yml} |  2 +-
 .github/workflows/rclone_docker_testing.yml   | 39 ++++++++
 CHANGELOG.md                                  |  3 +
 dockerfiles/rclone_with_config                |  5 ++
 docs/developer_guide/docker_images.rst        |  6 +-
 docs/user_guide/docker_demo.rst               | 33 ++++++-
 tests/docker_rclone_with_config_cli.py        | 89 +++++++++++++++++++
 ...ocker_yaml_conversion_specification_cli.py |  1 -
 9 files changed, 209 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/build_and_upload_docker_image_rclone_with_config.yml
 rename .github/workflows/{docker_testing.yml => neuroconv_docker_testing.yml} (99%)
 create mode 100644 .github/workflows/rclone_docker_testing.yml
 create mode 100644 dockerfiles/rclone_with_config
 create mode 100644 tests/docker_rclone_with_config_cli.py

diff --git a/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml b/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml
new file mode 100644
index 000000000..7ff197bdc
--- /dev/null
+++ b/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml
@@ -0,0 +1,36 @@
+name: Build and Upload Docker Image of Rclone With Config to GHCR
+
+on:
+  schedule:
+    - cron: "0 16 * * 1"  # Weekly at noon EST on Monday
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  release-image:
+    name: Build and Upload Docker Image of Rclone With Config to GHCR
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
+          password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true  # Push is a shorthand for --output=type=registry
+          tags: ghcr.io/catalystneuro/rclone_with_config:latest
+          context: .
+          file: dockerfiles/rclone_with_config
+          provenance: false
diff --git a/.github/workflows/docker_testing.yml b/.github/workflows/neuroconv_docker_testing.yml
similarity index 99%
rename from .github/workflows/docker_testing.yml
rename to .github/workflows/neuroconv_docker_testing.yml
index 6916e0e4e..282da7937 100644
--- a/.github/workflows/docker_testing.yml
+++ b/.github/workflows/neuroconv_docker_testing.yml
@@ -1,4 +1,4 @@
-name: Docker CLI tests
+name: NeuroConv Docker CLI tests
 on:
   schedule:
     - cron: "0 16 * * *"  # Daily at noon EST
diff --git a/.github/workflows/rclone_docker_testing.yml b/.github/workflows/rclone_docker_testing.yml
new file mode 100644
index 000000000..2e8ea9e17
--- /dev/null
+++ b/.github/workflows/rclone_docker_testing.yml
@@ -0,0 +1,39 @@
+name: Rclone Docker Tests
+on:
+  schedule:
+    - cron: "0 16 * * *"  # Daily at noon EST
+  workflow_dispatch:
+
+jobs:
+  run:
+    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - run: git fetch --prune --unshallow --tags
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Global Setup
+        run: python -m pip install -U pip  # Official recommended way
+
+      - name: Install pytest and neuroconv minimal
+        run: |
+          pip install pytest
+          pip install .
+
+      - name: Pull docker image
+        run: docker pull ghcr.io/catalystneuro/rclone_with_config:latest
+      - name: Run docker tests
+        run: pytest tests/docker_rclone_with_config_cli.py -vv -rsx
+        env:
+          RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }}
+          RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }}
+          RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe0f90ed6..3e988ac17 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Upcoming
 
+### Features
+* Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902)
+
 ### Bug fixes
 * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922)
 * Fixed a case of the `NeuroScopeSortingExtractor` when the optional `xml_file_path` is not specified. [PR #926](https://github.com/catalystneuro/neuroconv/pull/926)
diff --git a/dockerfiles/rclone_with_config b/dockerfiles/rclone_with_config
new file mode 100644
index 000000000..985ce8019
--- /dev/null
+++ b/dockerfiles/rclone_with_config
@@ -0,0 +1,5 @@
+FROM rclone/rclone:latest
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A simple extension of the basic Rclone docker image to automatically create a local .conf file from contents passed via an environment variable."
+CMD printf "$RCLONE_CONFIG" > ./rclone.conf && eval "$RCLONE_COMMAND"
+ENTRYPOINT [""]
diff --git a/docs/developer_guide/docker_images.rst b/docs/developer_guide/docker_images.rst
index d78b3cfd0..0310734a3 100644
--- a/docs/developer_guide/docker_images.rst
+++ b/docs/developer_guide/docker_images.rst
@@ -54,6 +54,8 @@ After building the image itself, we can publish the container with...
 
     Though it may appear confusing, the use of the ``IMAGE_NAME`` in these steps determines only the _name_ of the package as available from the 'packages' screen of the host repository; the ``LABEL`` itself ensured the upload and linkage to the NeuroConv GHCR.
 
+All our docker images can be built in GitHub Actions (for Ubuntu) and pushed automatically to the GHCR by manually triggering their respective workflow. Keep in mind that most of them are on semi-regular CRON schedules, though.
+
 
 
 Run Docker container on local YAML conversion specification file
@@ -73,12 +75,14 @@ and can then run the entrypoint (equivalent to the usual command line usage) on
 
 
 
+.. _developer_docker_details:
+
 Run Docker container on YAML conversion specification environment variable
 --------------------------------------------------------------------------
 
 An alternative approach that simplifies usage on systems such as AWS Batch is to specify the YAML contents as an environment variable. The YAML file is constructed in the first step of the container launch.
 
-The only potential downside with this usage is the maximum size of an environment variable (~13,000 characters). Typical YAML specification files should not come remotely close to this limit.
+The only potential downside with this usage is the maximum size of an environment variable (~13,000 characters). Typical YAML specification files should not come remotely close to this limit. This is contrasted to the limits on the CMD line of any docker container, which is either 8192 characters for Windows or either 64 or 128 KiB depending on UNIX build.
 
 Otherwise, in any cloud deployment, the YAML file transfer will have to be managed separately, likely as a part of the data transfer or an entirely separate step.
 
diff --git a/docs/user_guide/docker_demo.rst b/docs/user_guide/docker_demo.rst
index 92a1d5b2f..e089b5748 100644
--- a/docs/user_guide/docker_demo.rst
+++ b/docs/user_guide/docker_demo.rst
@@ -1,5 +1,5 @@
-Docker Demo
------------
+NeuroConv Docker Demo
+---------------------
 
 The following is an explicit demonstration of how to use the Docker-based NeuroConv YAML specification via the command line.
 
@@ -116,3 +116,32 @@ Voilà! If everything occurred successfully, you should see...
     Metadata is valid!
     conversion_options is valid!
     NWB file saved at /demo_neuroconv_docker/demo_output/phy_from_docker_yaml.nwb!
+
+
+
+
+RClone With Config Docker Demo
+------------------------------
+
+NeuroConv also supports a convenient Docker image for running data transfers via `Rclone <https://rclone.org>`_.
+
+To use this image, you must first configure the remote locally by calling:
+
+.. code::
+
+    rclone config
+
+And following all interactive instructions (defaults are usually sufficient).
+
+The Docker image requires two environment variables to be set (see :ref:`developer_docker_details` for more details in a related process).
+
+- ``RCLONE_CONFIG``: The full file content of the rclone.conf file on your system. You can find this by calling ``rclone config file``. On UNIX, for example, you can set this variable using ``RCLONE_CONFIG=$(<rclone.conf)`` from the folder containing the file
+- ``RCLONE_COMMAND``: The Rclone command to run. For example, ``remote_name:source_folder destination_folder --verbose --progress --config ./rclone.conf``, where ``remote_name`` is the name used during initial setup through ``rclone config``, ``source_folder`` is the name of the folder you wish to transfer data from on that remote, and ``destination_folder`` is the local folder to transfer the data to.
+
+Then, you can use the following command to run the Rclone Docker image:
+
+.. code::
+
+    docker run -t --volume destination_folder:destination_folder -e RCLONE_CONFIG="$RCLONE_CONFIG" -e RCLONE_COMMAND="$RCLONE_COMMAND" ghcr.io/catalystneuro/rclone_with_config:latest
+
+This image is particularly designed for convenience with AWS Batch (EC2) tools that rely heavily on atomic Docker operations. Alternative AWS approaches would have relied on transferring the Rclone configuration file to the EC2 instances using separate transfer protocols or dependent steps, both of which add complexity to the workflow.
diff --git a/tests/docker_rclone_with_config_cli.py b/tests/docker_rclone_with_config_cli.py
new file mode 100644
index 000000000..ed472bdf2
--- /dev/null
+++ b/tests/docker_rclone_with_config_cli.py
@@ -0,0 +1,89 @@
+"""
+This file is hidden from normal pytest globbing by not including 'test' in the filename.
+
+Instead, the tests must be invoked directly from the file. This is designed mostly for use in the GitHub Actions.
+
+To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive
+called 'testing_rclone_with_config' which contains a single subfolder 'ci_tests'
+with example text file 'test_text_file.txt' containing the content
+"This is a test file for the Rclone (with config) docker image hosted on NeuroConv!".
+
+Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file.
+The developer can easily find the location of the config file on their system using `rclone config file`.
+"""
+
+import os
+
+from hdmf.testing import TestCase
+
+from neuroconv.tools import deploy_process
+
+from .test_on_data.setup_paths import OUTPUT_PATH
+
+RCLONE_DRIVE_ACCESS_TOKEN = os.environ["RCLONE_DRIVE_ACCESS_TOKEN"]
+RCLONE_DRIVE_REFRESH_TOKEN = os.environ["RCLONE_DRIVE_REFRESH_TOKEN"]
+RCLONE_EXPIRY_TOKEN = os.environ["RCLONE_EXPIRY_TOKEN"]
+
+
+class TestRcloneWithConfig(TestCase):
+    test_folder = OUTPUT_PATH / "rclone_tests"
+
+    # Save the .conf file in a separate folder to avoid the potential of the container using the locally mounted file
+    adjacent_folder = OUTPUT_PATH / "rclone_conf"
+    test_config_file = adjacent_folder / "rclone.conf"
+
+    def setUp(self):
+        self.test_folder.mkdir(exist_ok=True)
+        self.adjacent_folder.mkdir(exist_ok=True)
+
+        # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command)
+        token_dictionary = dict(
+            access_token=RCLONE_DRIVE_ACCESS_TOKEN,
+            token_type="Bearer",
+            refresh_token=RCLONE_DRIVE_REFRESH_TOKEN,
+            expiry=RCLONE_EXPIRY_TOKEN,
+        )
+        token_string = str(token_dictionary).replace("'", '"').replace(" ", "")
+        rclone_config_contents = [
+            "[test_google_drive_remote]\n",
+            "type = drive\n",
+            "scope = drive\n",
+            f"token = {token_string}\n",
+            "team_drive = \n",
+            "\n",
+        ]
+        with open(file=self.test_config_file, mode="w") as io:
+            io.writelines(rclone_config_contents)
+
+    def test_direct_usage_of_rclone_with_config(self):
+        with open(file=self.test_config_file, mode="r") as io:
+            rclone_config_file_stream = io.read()
+
+        os.environ["RCLONE_CONFIG"] = rclone_config_file_stream
+        os.environ["RCLONE_COMMAND"] = (
+            f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} --verbose --progress --config ./rclone.conf"
+        )
+
+        command = (
+            "docker run -t "
+            f"--volume {self.test_folder}:{self.test_folder} "
+            '-e RCLONE_CONFIG="$RCLONE_CONFIG" '
+            '-e RCLONE_COMMAND="$RCLONE_COMMAND" '
+            "ghcr.io/catalystneuro/rclone_with_config:latest"
+        )
+        deploy_process(command=command)
+
+        # The .conf file created inside the container should not be viewable outside the running container
+        # (it was not saved to mounted location)
+
+        test_folder_contents_after_call = list(self.test_folder.iterdir())
+        assert len(test_folder_contents_after_call) != 0, f"Test folder {self.test_folder} is empty!"
+
+        testing_file_path = self.test_folder / "ci_tests" / "test_text_file.txt"
+        assert testing_file_path.is_file(), "The specific test transfer file does not exist!"
+
+        with open(file=testing_file_path, mode="r") as io:
+            file_content = io.read()
+            assert (
+                file_content == "This is a test file for the Rclone (with config) docker image hosted on NeuroConv!"
+            ), "The file content does not match expectations!"
diff --git a/tests/docker_yaml_conversion_specification_cli.py b/tests/docker_yaml_conversion_specification_cli.py
index dd5da8d17..c15d2e780 100644
--- a/tests/docker_yaml_conversion_specification_cli.py
+++ b/tests/docker_yaml_conversion_specification_cli.py
@@ -5,7 +5,6 @@
 """
 
 import os
-import unittest
 from datetime import datetime
 from pathlib import Path
 

From 38643f184ee6e98f44e92cb407bc24cba6de4e8e Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 9 Jul 2024 15:28:34 -0400
Subject: [PATCH 04/31] [Backend Configuration VIIIa] Purge H5DataIO references
 - Audio (#939)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                  |  6 ++++++
 .../behavior/audio/audiointerface.py          |  6 ++----
 src/neuroconv/tools/audio/audio.py            | 19 +++++++++++++------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e988ac17..9bb5da5ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Upcoming
 
+### Deprecations
+* The usage of `compression_options` directly through the `neuroconv.tools.audio` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #939](https://github.com/catalystneuro/neuroconv/pull/939)
+
 ### Features
 * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902)
 
@@ -13,6 +16,8 @@
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
 * Add an option to suppress display the progress bar (tqdm) in `VideoContext`  [PR #937](https://github.com/catalystneuro/neuroconv/pull/937)
 
+
+
 ## v0.4.11 (June 14, 2024)
 
 ### Bug fixes
@@ -26,6 +31,7 @@
 * Converter working with multiple VideoInterface instances [PR 914](https://github.com/catalystneuro/neuroconv/pull/914)
 
 
+
 ## v0.4.10 (June 6, 2024)
 
 ### Bug fixes
diff --git a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
index e82c7d7ed..7e29a9fac 100644
--- a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
+++ b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
@@ -168,7 +168,7 @@ def add_to_nwbfile(
         stub_frames: int = 1000,
         write_as: Literal["stimulus", "acquisition"] = "stimulus",
         iterator_options: Optional[dict] = None,
-        compression_options: Optional[dict] = None,
+        compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
         overwrite: bool = False,
         verbose: bool = True,
     ):
@@ -185,8 +185,6 @@ def add_to_nwbfile(
             "stimulus" or as "acquisition".
         iterator_options : dict, optional
             Dictionary of options for the SliceableDataChunkIterator.
-        compression_options : dict, optional
-            Dictionary of options for compressing the data for H5DataIO.
         overwrite : bool, default: False
         verbose : bool, default: True
 
@@ -228,7 +226,7 @@ def add_to_nwbfile(
                 write_as=write_as,
                 starting_time=starting_times[file_index],
                 iterator_options=iterator_options,
-                compression_options=compression_options,
+                compression_options=compression_options,  # TODO: remove completely after 10/1/2024; still passing for deprecation warning
             )
 
         return nwbfile
diff --git a/src/neuroconv/tools/audio/audio.py b/src/neuroconv/tools/audio/audio.py
index ca71e9670..44d10de63 100644
--- a/src/neuroconv/tools/audio/audio.py
+++ b/src/neuroconv/tools/audio/audio.py
@@ -1,7 +1,6 @@
 from typing import Literal, Optional
 from warnings import warn
 
-from hdmf.backends.hdf5 import H5DataIO
 from pynwb import NWBFile
 
 from neuroconv.tools.hdmf import SliceableDataChunkIterator
@@ -16,7 +15,7 @@ def add_acoustic_waveform_series(
     starting_time: float = 0.0,
     write_as: Literal["stimulus", "acquisition"] = "stimulus",
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
     """
 
@@ -42,8 +41,6 @@ def add_acoustic_waveform_series(
         "stimulus" or as "acquisition".
     iterator_options : dict, optional
         Dictionary of options for the SliceableDataChunkIterator.
-    compression_options : dict, optional
-        Dictionary of options for compressing the data for H5DataIO.
 
     Returns
     -------
@@ -56,7 +53,17 @@ def add_acoustic_waveform_series(
         "acquisition",
     ], "Acoustic series can be written either as 'stimulus' or 'acquisition'."
 
-    compression_options = compression_options or dict(compression="gzip")
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     iterator_options = iterator_options or dict()
 
     container = nwbfile.acquisition if write_as == "acquisition" else nwbfile.stimulus
@@ -68,7 +75,7 @@ def add_acoustic_waveform_series(
     acoustic_waveform_series_kwargs = dict(
         rate=float(rate),
         starting_time=starting_time,
-        data=H5DataIO(SliceableDataChunkIterator(data=acoustic_series, **iterator_options), **compression_options),
+        data=SliceableDataChunkIterator(data=acoustic_series, **iterator_options),
     )
 
     # Add metadata

From 808735ff56c5ecadf1ff16255f52d571a7e12959 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:23:56 -0400
Subject: [PATCH 05/31] [Backend Configuration VIIIe] Purge H5DataIO references
 - Icephys (#943)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                   |  1 +
 src/neuroconv/tools/neo/neo.py | 51 ++++++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bb5da5ba..2c6428d21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ### Deprecations
 * The usage of `compression_options` directly through the `neuroconv.tools.audio` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #939](https://github.com/catalystneuro/neuroconv/pull/939)
+* The usage of `compression` directly through the `neuroconv.tools.neo` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #943](https://github.com/catalystneuro/neuroconv/pull/943)
 
 ### Features
 * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902)
diff --git a/src/neuroconv/tools/neo/neo.py b/src/neuroconv/tools/neo/neo.py
index 6af9a3796..bcb27a40b 100644
--- a/src/neuroconv/tools/neo/neo.py
+++ b/src/neuroconv/tools/neo/neo.py
@@ -8,7 +8,6 @@
 import neo.io.baseio
 import numpy as np
 import pynwb
-from hdmf.backends.hdf5 import H5DataIO
 
 from ..nwb_helpers import add_device_from_metadata
 from ...utils import OptionalFilePathType
@@ -215,7 +214,7 @@ def add_icephys_recordings(
     icephys_experiment_type: str = "voltage_clamp",
     stimulus_type: str = "not described",
     skip_electrodes: Tuple[int] = (),
-    compression: str = "gzip",
+    compression: Optional[str] = None,  # TODO: remove completely after 10/1/2024
 ):
     """
     Add icephys recordings (stimulus/response pairs) to nwbfile object.
@@ -230,8 +229,18 @@ def add_icephys_recordings(
     stimulus_type : str, default: 'not described'
     skip_electrodes : tuple, default: ()
         Electrode IDs to skip.
-    compression : str | bool
     """
+    # TODO: remove completely after 10/1/2024
+    if compression is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     n_segments = get_number_of_segments(neo_reader, block=0)
 
     # Check for protocol data (only ABF2), necessary for stimuli data
@@ -317,10 +326,7 @@ def add_icephys_recordings(
                 name=response_name,
                 description=f"Response to: {session_stimulus_type}",
                 electrode=electrode,
-                data=H5DataIO(
-                    data=neo_reader.get_analogsignal_chunk(block_index=0, seg_index=si, channel_indexes=ei),
-                    compression=compression,
-                ),
+                data=neo_reader.get_analogsignal_chunk(block_index=0, seg_index=si, channel_indexes=ei),
                 starting_time=starting_time,
                 rate=sampling_rate,
                 conversion=response_conversion * response_gain,
@@ -374,7 +380,7 @@ def add_neo_to_nwb(
     neo_reader,
     nwbfile: pynwb.NWBFile,
     metadata: dict = None,
-    compression: Optional[str] = "gzip",
+    compression: Optional[str] = None,  # TODO: remove completely after 10/1/2024
     icephys_experiment_type: str = "voltage_clamp",
     stimulus_type: Optional[str] = None,
     skip_electrodes: Tuple[int] = (),
@@ -393,9 +399,6 @@ def add_neo_to_nwb(
         metadata info for constructing the nwb file (optional).
         Check the auxiliary function docstrings for more information
         about metadata format.
-    compression: str (optional, defaults to "gzip")
-        Type of compression to use. Valid types are "gzip" and "lzf".
-        Set to None to disable all compression.
     icephys_experiment_type: str (optional)
         Type of Icephys experiment. Allowed types are: 'voltage_clamp', 'current_clamp' and 'izero'.
         If no value is passed, 'voltage_clamp' is used as default.
@@ -405,6 +408,17 @@ def add_neo_to_nwb(
     """
     assert isinstance(nwbfile, pynwb.NWBFile), "'nwbfile' should be of type pynwb.NWBFile"
 
+    # TODO: remove completely after 10/1/2024
+    if compression is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata)
 
     add_icephys_electrode(
@@ -420,7 +434,6 @@ def add_neo_to_nwb(
         icephys_experiment_type=icephys_experiment_type,
         stimulus_type=stimulus_type,
         skip_electrodes=skip_electrodes,
-        compression=compression,
     )
 
 
@@ -430,7 +443,7 @@ def write_neo_to_nwb(
     overwrite: bool = False,
     nwbfile=None,
     metadata: dict = None,
-    compression: Optional[str] = "gzip",
+    compression: Optional[str] = None,  # TODO: remove completely after 10/1/2024
     icephys_experiment_type: Optional[str] = None,
     stimulus_type: Optional[str] = None,
     skip_electrodes: Optional[tuple] = (),
@@ -505,13 +518,23 @@ def write_neo_to_nwb(
 
     assert save_path is None or nwbfile is None, "Either pass a save_path location, or nwbfile object, but not both!"
 
+    # TODO: remove completely after 10/1/2024
+    if compression is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     if metadata is None:
         metadata = get_nwb_metadata(neo_reader=neo_reader)
 
     kwargs = dict(
         neo_reader=neo_reader,
         metadata=metadata,
-        compression=compression,
         icephys_experiment_type=icephys_experiment_type,
         stimulus_type=stimulus_type,
         skip_electrodes=skip_electrodes,

From 42983878baff870a2317582ec3141e212cefc551 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 9 Jul 2024 19:41:37 -0400
Subject: [PATCH 06/31] [Backend Configuration VIIIc] Purge H5DataIO references
 - FicTrac (#941)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 .../behavior/fictrac/fictracdatainterface.py  | 23 +++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2c6428d21..118571ba3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ### Deprecations
 * The usage of `compression_options` directly through the `neuroconv.tools.audio` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #939](https://github.com/catalystneuro/neuroconv/pull/939)
+* The usage of `compression` and `compression_opts` directly through the `FicTracDataInterface` is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #941](https://github.com/catalystneuro/neuroconv/pull/941)
 * The usage of `compression` directly through the `neuroconv.tools.neo` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #943](https://github.com/catalystneuro/neuroconv/pull/943)
 
 ### Features
diff --git a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py
index 57ab1e54e..6ca1ae1b1 100644
--- a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py
@@ -5,7 +5,6 @@
 from typing import Optional, Union
 
 import numpy as np
-from hdmf.backends.hdf5.h5_utils import H5DataIO
 from pynwb.behavior import Position, SpatialSeries
 from pynwb.file import NWBFile
 
@@ -208,8 +207,8 @@ def add_to_nwbfile(
         self,
         nwbfile: NWBFile,
         metadata: Optional[dict] = None,
-        compression: Optional[str] = "gzip",
-        compression_opts: Optional[int] = None,
+        compression: Optional[str] = None,  # TODO: remove completely after 10/1/2024
+        compression_opts: Optional[int] = None,  # TODO: remove completely after 10/1/2024
     ):
         """
         Parameters
@@ -218,14 +217,20 @@ def add_to_nwbfile(
             nwb file to which the recording information is to be added
         metadata: dict, optional
             metadata info for constructing the nwb file.
-        compression: str, default: 'gzip'
-            The type of compression to use. Should be one of 'gzip', 'lzf'. If None, no compression is used.
-        compression_opts: int, optional
-
         """
-
         import pandas as pd
 
+        # TODO: remove completely after 10/1/2024
+        if compression is not None or compression_opts is not None:
+            warn(
+                message=(
+                    "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                    "Please use the `configure_backend` tool function for this purpose."
+                ),
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+
         fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, names=self.columns_in_dat_file)
 
         # Get the timestamps
@@ -257,8 +262,6 @@ def add_to_nwbfile(
 
             column_in_dat_file = data_dict["column_in_dat_file"]
             data = fictrac_data_df[column_in_dat_file].to_numpy()
-            if compression:
-                data = H5DataIO(data, compression=compression, compression_opts=compression_opts)
             if self.radius is not None:
                 spatial_series_kwargs["conversion"] = self.radius
                 units = "meters"

From 99cf181728f032048b38bd5c32194fd5611c6b25 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Tue, 9 Jul 2024 20:28:56 -0400
Subject: [PATCH 07/31] [Backend Configuration VIIId] Purge H5DataIO references
 - LightningPose (#942)

---
 CHANGELOG.md                                                   | 3 +++
 .../behavior/lightningpose/lightningposedatainterface.py       | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 118571ba3..9a22ea38f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,9 @@
 ### Improvements
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
 * Add an option to suppress display the progress bar (tqdm) in `VideoContext`  [PR #937](https://github.com/catalystneuro/neuroconv/pull/937)
+* Automatic compression of data in the `LightnignPoseDataInterface` has been disabled - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #942](https://github.com/catalystneuro/neuroconv/pull/942)
+
+
 
 
 
diff --git a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py
index 034c07bba..28c93db9c 100644
--- a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py
@@ -5,7 +5,6 @@
 from typing import Optional, Tuple
 
 import numpy as np
-from hdmf.backends.hdf5 import H5DataIO
 from pynwb import NWBFile
 
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
@@ -230,7 +229,7 @@ def add_to_nwbfile(
             assert len(timestamps) == len(
                 pose_estimation_data
             ), f"The length of timestamps ({len(timestamps)}) and pose estimation data ({len(pose_estimation_data)}) must be equal."
-            pose_estimation_series_kwargs = dict(timestamps=H5DataIO(data=timestamps, compression="gzip"))
+            pose_estimation_series_kwargs = dict(timestamps=timestamps)
 
         pose_estimation_series = []
         for keypoint_name in self.keypoint_names:

From f9af6f7df318d43e507b58f68cc2bb9cb6ee181b Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 13:52:49 -0400
Subject: [PATCH 08/31] [Backend Configuration VIIIb] Purge H5DataIO references
 - Ophys (#940)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                  |   1 +
 .../basesegmentationextractorinterface.py     |   8 +-
 .../tools/roiextractors/roiextractors.py      | 116 ++++++++++++------
 tests/test_ophys/test_tools_roiextractors.py  |  32 ++---
 4 files changed, 92 insertions(+), 65 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a22ea38f..04e10f6a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 * The usage of `compression_options` directly through the `neuroconv.tools.audio` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #939](https://github.com/catalystneuro/neuroconv/pull/939)
 * The usage of `compression` and `compression_opts` directly through the `FicTracDataInterface` is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #941](https://github.com/catalystneuro/neuroconv/pull/941)
 * The usage of `compression` directly through the `neuroconv.tools.neo` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #943](https://github.com/catalystneuro/neuroconv/pull/943)
+* The usage of `compression_options` directly through the `neuroconv.tools.ophys` submodule is now deprecated - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #940](https://github.com/catalystneuro/neuroconv/pull/940)
 
 ### Features
 * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902)
diff --git a/src/neuroconv/datainterfaces/ophys/basesegmentationextractorinterface.py b/src/neuroconv/datainterfaces/ophys/basesegmentationextractorinterface.py
index d348e05ba..5fa6217cc 100644
--- a/src/neuroconv/datainterfaces/ophys/basesegmentationextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ophys/basesegmentationextractorinterface.py
@@ -119,7 +119,9 @@ def add_to_nwbfile(
         mask_type: Optional[str] = "image",  # Literal["image", "pixel", "voxel"]
         plane_segmentation_name: Optional[str] = None,
         iterator_options: Optional[dict] = None,
-        compression_options: Optional[dict] = None,
+        compression_options: Optional[
+            dict
+        ] = None,  # TODO: remove completely after 10/1/2024; still passing for deprecation warning
     ):
         """
 
@@ -158,8 +160,6 @@ def add_to_nwbfile(
             The name of the plane segmentation to be added.
         iterator_options : dict, optional
             The options to use when iterating over the image masks of the segmentation extractor.
-        compression_options : dict, optional
-            The options to use when compressing the image masks of the segmentation extractor.
 
         Returns
         -------
@@ -183,5 +183,5 @@ def add_to_nwbfile(
             mask_type=mask_type,
             plane_segmentation_name=plane_segmentation_name,
             iterator_options=iterator_options,
-            compression_options=compression_options,
+            compression_options=compression_options,  # TODO: remove completely after 10/1/2024; still passing for deprecation warning
         )
diff --git a/src/neuroconv/tools/roiextractors/roiextractors.py b/src/neuroconv/tools/roiextractors/roiextractors.py
index 08258f0eb..3b8cac2ac 100644
--- a/src/neuroconv/tools/roiextractors/roiextractors.py
+++ b/src/neuroconv/tools/roiextractors/roiextractors.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import psutil
-from hdmf.backends.hdf5.h5_utils import H5DataIO
 
 # from hdmf.common import VectorData
 from hdmf.data_utils import DataChunkIterator
@@ -434,8 +433,7 @@ def add_photon_series(
         iterator_type=iterator_type,
         iterator_options=iterator_options,
     )
-    data = H5DataIO(data=frames_to_iterator, compression=True)
-    photon_series_kwargs.update(data=data)
+    photon_series_kwargs.update(data=frames_to_iterator)
 
     # Add dimension
     photon_series_kwargs.update(dimension=imaging.get_image_size())
@@ -447,7 +445,7 @@ def add_photon_series(
         if estimated_rate:
             photon_series_kwargs.update(starting_time=timestamps[0], rate=estimated_rate)
         else:
-            photon_series_kwargs.update(timestamps=H5DataIO(data=timestamps, compression="gzip"), rate=None)
+            photon_series_kwargs.update(timestamps=timestamps, rate=None)
     else:
         rate = float(imaging.get_sampling_frequency())
         photon_series_kwargs.update(rate=rate)
@@ -700,7 +698,7 @@ def add_plane_segmentation(
     include_roi_acceptance: bool = True,
     mask_type: Optional[str] = "image",  # Optional[Literal["image", "pixel"]]
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
     """
     Adds the plane segmentation specified by the metadata to the image segmentation.
@@ -739,14 +737,23 @@ def add_plane_segmentation(
         If None, the mask information is not written to the NWB file.
     iterator_options : dict, optional
         The options to use when iterating over the image masks of the segmentation extractor.
-    compression_options : dict, optional
-        The options to use when compressing the image masks of the segmentation extractor.
 
     Returns
     -------
     NWBFile
         The nwbfile passed as an input with the plane segmentation added.
     """
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     default_plane_segmentation_index = 0
     roi_ids = segmentation_extractor.get_roi_ids()
     if include_roi_acceptance:
@@ -785,7 +792,6 @@ def add_plane_segmentation(
         include_roi_acceptance=include_roi_acceptance,
         mask_type=mask_type,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     return nwbfile
 
@@ -805,10 +811,8 @@ def _add_plane_segmentation(
     rejected_ids: Optional[list] = None,
     mask_type: Optional[str] = "image",  # Optional[Literal["image", "pixel"]]
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
 ) -> NWBFile:
     iterator_options = iterator_options or dict()
-    compression_options = compression_options or dict(compression="gzip")
 
     # Set the defaults and required infrastructure
     metadata_copy = deepcopy(metadata)
@@ -862,7 +866,7 @@ def _add_plane_segmentation(
             plane_segmentation.add_column(
                 name="image_mask",
                 description="Image masks for each ROI.",
-                data=H5DataIO(image_or_pixel_masks.T, **compression_options),
+                data=image_or_pixel_masks.T,
             )
         elif mask_type == "pixel" or mask_type == "voxel":
             pixel_masks = image_or_pixel_masks
@@ -896,19 +900,19 @@ def _add_plane_segmentation(
             plane_segmentation.add_column(
                 name="ROICentroids",
                 description="The x, y, (z) centroids of each ROI.",
-                data=H5DataIO(roi_locations, **compression_options),
+                data=roi_locations,
             )
 
         if include_roi_acceptance:
             plane_segmentation.add_column(
                 name="Accepted",
                 description="1 if ROI was accepted or 0 if rejected as a cell during segmentation operation.",
-                data=H5DataIO(accepted_ids, **compression_options),
+                data=accepted_ids,
             )
             plane_segmentation.add_column(
                 name="Rejected",
                 description="1 if ROI was rejected or 0 if accepted as a cell during segmentation operation.",
-                data=H5DataIO(rejected_ids, **compression_options),
+                data=rejected_ids,
             )
 
         image_segmentation.add_plane_segmentation(plane_segmentations=[plane_segmentation])
@@ -922,8 +926,19 @@ def add_background_plane_segmentation(
     background_plane_segmentation_name: Optional[str] = None,
     mask_type: Optional[str] = "image",  # Optional[Literal["image", "pixel"]]
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     default_plane_segmentation_index = 1
     background_ids = segmentation_extractor.get_background_ids()
     if mask_type == "image":
@@ -946,7 +961,6 @@ def add_background_plane_segmentation(
         plane_segmentation_name=background_plane_segmentation_name,
         mask_type=mask_type,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     return nwbfile
 
@@ -959,7 +973,7 @@ def add_fluorescence_traces(
     include_background_segmentation: bool = False,
     plane_index: Optional[int] = None,  # TODO: to be removed
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
     """
     Adds the fluorescence traces specified by the metadata to the nwb file.
@@ -980,13 +994,23 @@ def add_fluorescence_traces(
         Whether to include the background plane segmentation and fluorescence traces in the NWB file. If False,
         neuropil traces are included in the main plane segmentation rather than the background plane segmentation.
     iterator_options : dict, optional
-    compression_options : dict, optional
 
     Returns
     -------
     NWBFile
         The nwbfile passed as an input with the fluorescence traces added.
     """
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     default_plane_segmentation_index = 0
 
     traces_to_add = segmentation_extractor.get_traces_dict()
@@ -1010,7 +1034,6 @@ def add_fluorescence_traces(
         plane_segmentation_name=plane_segmentation_name,
         plane_index=plane_index,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     return nwbfile
 
@@ -1025,10 +1048,8 @@ def _add_fluorescence_traces(
     plane_segmentation_name: Optional[str] = None,
     plane_index: Optional[int] = None,  # TODO: to be removed
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
 ):
     iterator_options = iterator_options or dict()
-    compression_options = compression_options or dict(compression="gzip")
 
     # Set the defaults and required infrastructure
     metadata_copy = deepcopy(metadata)
@@ -1074,7 +1095,7 @@ def _add_fluorescence_traces(
         if estimated_rate:
             roi_response_series_kwargs.update(starting_time=timestamps[0], rate=estimated_rate)
         else:
-            roi_response_series_kwargs.update(timestamps=H5DataIO(data=timestamps, compression="gzip"), rate=None)
+            roi_response_series_kwargs.update(timestamps=timestamps, rate=None)
     else:
         rate = float(segmentation_extractor.get_sampling_frequency())
         roi_response_series_kwargs.update(rate=rate)
@@ -1115,7 +1136,7 @@ def _add_fluorescence_traces(
 
         # Build the roi response series
         roi_response_series_kwargs.update(
-            data=H5DataIO(SliceableDataChunkIterator(trace, **iterator_options), **compression_options),
+            data=SliceableDataChunkIterator(trace, **iterator_options),
             rois=roi_table_region,
             **trace_metadata,
         )
@@ -1208,7 +1229,7 @@ def add_background_fluorescence_traces(
     background_plane_segmentation_name: Optional[str] = None,
     plane_index: Optional[int] = None,  # TODO: to be removed
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
     """
     Adds the fluorescence traces specified by the metadata to the nwb file.
@@ -1226,13 +1247,23 @@ def add_background_fluorescence_traces(
     plane_segmentation_name : str, optional
         The name of the plane segmentation that identifies which plane to add the fluorescence traces to.
     iterator_options : dict, optional
-    compression_options : dict, optional
 
     Returns
     -------
     NWBFile
         The nwbfile passed as an input with the fluorescence traces added.
     """
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     default_plane_segmentation_index = 1
 
     traces_to_add = segmentation_extractor.get_traces_dict()
@@ -1256,7 +1287,6 @@ def add_background_fluorescence_traces(
         plane_segmentation_name=background_plane_segmentation_name,
         plane_index=plane_index,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     return nwbfile
 
@@ -1350,8 +1380,19 @@ def add_segmentation(
     include_roi_acceptance: bool = True,
     mask_type: Optional[str] = "image",  # Literal["image", "pixel"]
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ):
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     # Add device:
     add_devices(nwbfile=nwbfile, metadata=metadata)
 
@@ -1366,7 +1407,6 @@ def add_segmentation(
         include_roi_acceptance=include_roi_acceptance,
         mask_type=mask_type,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     if include_background_segmentation:
         add_background_plane_segmentation(
@@ -1376,7 +1416,6 @@ def add_segmentation(
             background_plane_segmentation_name=background_plane_segmentation_name,
             mask_type=mask_type,
             iterator_options=iterator_options,
-            compression_options=compression_options,
         )
 
     # Add fluorescence traces:
@@ -1387,7 +1426,6 @@ def add_segmentation(
         plane_segmentation_name=plane_segmentation_name,
         include_background_segmentation=include_background_segmentation,
         iterator_options=iterator_options,
-        compression_options=compression_options,
     )
     if include_background_segmentation:
         add_background_fluorescence_traces(
@@ -1396,7 +1434,6 @@ def add_segmentation(
             metadata=metadata,
             background_plane_segmentation_name=background_plane_segmentation_name,
             iterator_options=iterator_options,
-            compression_options=compression_options,
         )
 
     # Adding summary images (mean and correlation)
@@ -1420,7 +1457,7 @@ def write_segmentation(
     include_roi_acceptance: bool = True,
     mask_type: Optional[str] = "image",  # Literal["image", "pixel"]
     iterator_options: Optional[dict] = None,
-    compression_options: Optional[dict] = None,
+    compression_options: Optional[dict] = None,  # TODO: remove completely after 10/1/2024
 ) -> NWBFile:
     """
     Primary method for writing an SegmentationExtractor object to an NWBFile.
@@ -1472,15 +1509,23 @@ def write_segmentation(
         If None, the mask information is not written to the NWB file.
     iterator_options: dict, optional
         A dictionary with options for the internal iterators that process the data.
-    compression_options: dict, optional
-        A dictionary with options for the internal compression of the data.
     """
     assert (
         nwbfile_path is None or nwbfile is None
     ), "Either pass a nwbfile_path location, or nwbfile object, but not both!"
 
+    # TODO: remove completely after 10/1/2024
+    if compression_options is not None:
+        warn(
+            message=(
+                "Specifying compression methods and their options at the level of tool functions has been deprecated. "
+                "Please use the `configure_backend` tool function for this purpose."
+            ),
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
     iterator_options = iterator_options or dict()
-    compression_options = compression_options or dict(compression="gzip")
 
     # parse metadata correctly considering the MultiSegmentationExtractor function:
     if isinstance(segmentation_extractor, MultiSegmentationExtractor):
@@ -1525,7 +1570,6 @@ def write_segmentation(
                 include_roi_acceptance=include_roi_acceptance,
                 mask_type=mask_type,
                 iterator_options=iterator_options,
-                compression_options=compression_options,
             )
 
     return nwbfile_out
diff --git a/tests/test_ophys/test_tools_roiextractors.py b/tests/test_ophys/test_tools_roiextractors.py
index 712cae87f..95eb18676 100644
--- a/tests/test_ophys/test_tools_roiextractors.py
+++ b/tests/test_ophys/test_tools_roiextractors.py
@@ -17,7 +17,7 @@
 from numpy.testing import assert_array_equal, assert_raises
 from numpy.typing import ArrayLike
 from parameterized import param, parameterized
-from pynwb import NWBHDF5IO, H5DataIO, NWBFile
+from pynwb import NWBHDF5IO, NWBFile
 from pynwb.device import Device
 from pynwb.ophys import OnePhotonSeries
 from roiextractors.testing import (
@@ -917,12 +917,6 @@ def test_add_fluorescence_traces(self):
             series_outer_data = fluorescence[nwb_series_name].data
             assert_array_equal(series_outer_data.data.data, traces[roiextractors_name])
 
-            # Check compression options are set
-            assert isinstance(series_outer_data, H5DataIO)
-
-            compression_parameters = series_outer_data.get_io_params()
-            assert compression_parameters["compression"] == "gzip"
-
         # Check that df/F trace data is not being written to the Fluorescence container
         df_over_f = ophys.get(self.df_over_f_name)
         assert_raises(
@@ -981,12 +975,6 @@ def test_add_df_over_f_trace(self):
         series_outer_data = df_over_f[trace_name].data
         assert_array_equal(series_outer_data.data.data, traces["dff"])
 
-        # Check compression options are set
-        assert isinstance(series_outer_data, H5DataIO)
-
-        compression_parameters = series_outer_data.get_io_params()
-        assert compression_parameters["compression"] == "gzip"
-
     def test_add_fluorescence_one_of_the_traces_is_none(self):
         """Test that roi response series with None values are not added to the
         nwbfile."""
@@ -1495,8 +1483,7 @@ def test_default_values(self):
         # Check data
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         assert isinstance(data_chunk_iterator, ImagingExtractorDataChunkIterator)
 
         two_photon_series_extracted = np.concatenate([data_chunk.data for data_chunk in data_chunk_iterator])
@@ -1583,8 +1570,7 @@ def test_v1_iterator(self):
         # Check data
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         assert isinstance(data_chunk_iterator, DataChunkIterator)
         self.assertEqual(data_chunk_iterator.buffer_size, 10)
 
@@ -1609,8 +1595,7 @@ def test_iterator_options_propagation(self):
 
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         self.assertEqual(data_chunk_iterator.buffer_shape, buffer_shape)
         self.assertEqual(data_chunk_iterator.chunk_shape, chunk_shape)
 
@@ -1627,8 +1612,7 @@ def test_iterator_options_chunk_mb_propagation(self):
 
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         iterator_chunk_mb = math.prod(data_chunk_iterator.chunk_shape) * data_chunk_iterator.dtype.itemsize / 1e6
         assert iterator_chunk_mb <= chunk_mb
 
@@ -1644,8 +1628,7 @@ def test_iterator_options_chunk_shape_is_at_least_one(self):
         )
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         chunk_shape = data_chunk_iterator.chunk_shape
         assert_array_equal(chunk_shape, (30, 15, 10))
 
@@ -1661,8 +1644,7 @@ def test_iterator_options_chunk_shape_does_not_exceed_maxshape(self):
         )
         acquisition_modules = self.nwbfile.acquisition
         assert self.two_photon_series_name in acquisition_modules
-        data_in_hdfm_data_io = acquisition_modules[self.two_photon_series_name].data
-        data_chunk_iterator = data_in_hdfm_data_io.data
+        data_chunk_iterator = acquisition_modules[self.two_photon_series_name].data
         chunk_shape = data_chunk_iterator.chunk_shape
         assert_array_equal(chunk_shape, data_chunk_iterator.maxshape)
 

From f32a684839dc4b58ee2521b4bc08b39e4f98a433 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:07:34 -0400
Subject: [PATCH 09/31] port over dlc utils

---
 .../behavior/deeplabcut/_dlc_utils.py         | 314 ++++++++++++++++++
 1 file changed, 314 insertions(+)
 create mode 100644 src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
new file mode 100644
index 000000000..01a10de62
--- /dev/null
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -0,0 +1,314 @@
+import datetime
+import os
+import pickle
+import warnings
+from pathlib import Path
+from platform import python_version
+import importlib
+
+import cv2
+import yaml
+import numpy as np
+import pandas as pd
+from hdmf.build.warnings import DtypeConversionWarning
+from packaging.version import Version  # Installed with setuptools
+from pynwb import NWBFile, NWBHDF5IO
+from ndx_pose import PoseEstimationSeries, PoseEstimation
+from ruamel.yaml import YAML
+
+
+def _read_config(configname):
+    """
+    Reads structured config file defining a project.
+    """
+    ruamelFile = YAML()
+    path = Path(configname)
+    if os.path.exists(path):
+        try:
+            with open(path, "r") as f:
+                cfg = ruamelFile.load(f)
+                curr_dir = os.path.dirname(configname)
+                if cfg["project_path"] != curr_dir:
+                    cfg["project_path"] = curr_dir
+        except Exception as err:
+            if len(err.args) > 2:
+                if (
+                    err.args[2]
+                    == "could not determine a constructor for the tag '!!python/tuple'"
+                ):
+                    with open(path, "r") as ymlfile:
+                        cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+                else:
+                    raise
+
+    else:
+        raise FileNotFoundError(
+            "Config file is not found. Please make sure that the file exists and/or that you passed the path of the config file correctly!"
+        )
+    return cfg
+
+
+
+def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=True):
+    """
+    Return numpy array of the timestamps for a video.
+
+    Parameters
+    ----------
+    movie_file : str
+        Path to movie_file
+    """
+    # TODO: consider moving this to DLC, and actually extract alongside video analysis!
+
+    reader = cv2.VideoCapture(movie_file)
+    timestamps = []
+    for _ in range(len(reader)):
+        _ = reader.read()
+        timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC))
+
+    timestamps = np.array(timestamps) / 1000  # Convert to seconds
+
+    if np.nanvar(np.diff(timestamps)) < 1.0 / reader.fps * 1.0 / VARIABILITYBOUND:
+        warnings.warn(
+            "Variability of timestamps suspiciously small. See: https://github.com/DeepLabCut/DLC2NWB/issues/1"
+        )
+
+    if any(timestamps[1:] == 0):
+        # Infers times when OpenCV provides 0s
+        warning_msg = "Removing"
+        timestamp_zero_count = np.count_nonzero(timestamps == 0)
+        timestamps[1:][timestamps[1:] == 0] = np.nan # replace 0s with nan
+
+        if infer_timestamps:
+            warning_msg = "Replacing"
+            timestamps = _infer_nan_timestamps(timestamps)
+
+        warnings.warn( # warns user of percent of 0 frames
+            "%s cv2 timestamps returned as 0: %f%%"
+            % (warning_msg, ( timestamp_zero_count / len(timestamps) * 100))
+        )
+
+    return timestamps
+
+
+def _infer_nan_timestamps(timestamps):
+    """Given np.array, interpolate nan values using index * sampling rate"""
+    bad_timestamps_mask = np.isnan(timestamps)
+    # Runs of good timestamps
+    good_run_indices = np.where( 
+        np.diff(np.hstack(([False], bad_timestamps_mask == False, [False])))
+    )[0].reshape(-1, 2)
+    
+    # For each good run, get the diff and append to cumulative array
+    sampling_diffs = np.array([])
+    for idx in good_run_indices: 
+        sampling_diffs = np.append(sampling_diffs, np.diff(timestamps[idx[0]:idx[1]]))
+    estimated_sampling_rate = np.mean(sampling_diffs) # Average over diffs
+    
+    # Infer timestamps with avg sampling rate
+    bad_timestamps_indexes = np.argwhere(bad_timestamps_mask)[:, 0]
+    inferred_timestamps = bad_timestamps_indexes * estimated_sampling_rate
+    timestamps[bad_timestamps_mask] = inferred_timestamps
+
+    return timestamps
+
+
+def _ensure_individuals_in_header(df, dummy_name):
+    if "individuals" not in df.columns.names:
+        # Single animal project -> add individual row to
+        # the header of single animal projects.
+        temp = pd.concat({dummy_name: df}, names=["individuals"], axis=1)
+        df = temp.reorder_levels(
+            ["scorer", "individuals", "bodyparts", "coords"], axis=1
+        )
+    return df
+
+
+def _get_pes_args(config_file, h5file, individual_name, infer_timestamps=True):
+    if "DLC" not in h5file or not h5file.endswith(".h5"):
+        raise IOError("The file passed in is not a DeepLabCut h5 data file.")
+
+    cfg = _read_config(config_file)
+
+    vidname, scorer = os.path.split(h5file)[-1].split("DLC")
+    scorer = "DLC" + os.path.splitext(scorer)[0]
+    video = None
+
+    df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name)
+
+    # Fetch the corresponding metadata pickle file
+    paf_graph = []
+    filename, _ = os.path.splitext(h5file)
+    for i, c in enumerate(filename[::-1]):
+        if c.isnumeric():
+            break
+    if i > 0:
+        filename = filename[:-i]
+    metadata_file = filename + "_meta.pickle"
+    if os.path.isfile(metadata_file):
+        with open(metadata_file, "rb") as file:
+            metadata = pickle.load(file)
+        test_cfg = metadata["data"]["DLC-model-config file"]
+        paf_graph = test_cfg.get("partaffinityfield_graph", [])
+        if paf_graph:
+            paf_inds = test_cfg.get("paf_best")
+            if paf_inds is not None:
+                paf_graph = [paf_graph[i] for i in paf_inds]
+    else:
+        warnings.warn("Metadata not found...")
+
+    for video_path, params in cfg["video_sets"].items():
+        if vidname in video_path:
+            video = video_path, params["crop"]
+            break
+
+    if video is None:
+        warnings.warn(f"The video file corresponding to {h5file} could not be found...")
+        video = "fake_path", "0, 0, 0, 0"
+
+        timestamps = (
+            df.index.tolist()
+        )  # setting timestamps to dummy TODO: extract timestamps in DLC?
+    else:
+        timestamps = _get_movie_timestamps(video[0], infer_timestamps=infer_timestamps)
+    return scorer, df, video, paf_graph, timestamps, cfg
+
+
+def _write_pes_to_nwbfile(
+    nwbfile,
+    animal,
+    df_animal,
+    scorer,
+    video,  # Expects this to be a tuple; first index is string path, second is the image shape as "0, width, 0, height"
+    paf_graph,
+    timestamps,
+    exclude_nans,
+):  
+    pose_estimation_series = []
+    for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
+        data = xyp.to_numpy()
+
+        if exclude_nans: 
+            # exclude_nans is inverse infer_timestamps. if not infer, there may be nans
+            data = data[~np.isnan(timestamps)]
+            timestamps_cleaned = timestamps[~np.isnan(timestamps)] 
+        else:
+            timestamps_cleaned = timestamps
+
+        pes = PoseEstimationSeries(
+            name=f"{animal}_{kpt}",
+            description=f"Keypoint {kpt} from individual {animal}.",
+            data=data[:, :2],
+            unit="pixels",
+            reference_frame="(0,0) corresponds to the bottom left corner of the video.",
+            timestamps=timestamps_cleaned,
+            confidence=data[:, 2],
+            confidence_definition="Softmax output of the deep neural network.",
+        )
+        pose_estimation_series.append(pes)
+
+    deeplabcut_version = None
+    is_deeplabcut_installed = importlib.util.find_spec(name="deeplabcut") is not None
+    if is_deeplabcut_installed:
+        deeplabcut_version = importlib.metadata.version(distribution_name="deeplabcut")
+    pe = PoseEstimation(
+        pose_estimation_series=pose_estimation_series,
+        description="2D keypoint coordinates estimated using DeepLabCut.",
+        original_videos=[video[0]],
+        # TODO check if this is a mandatory arg in ndx-pose (can skip if video is not found_
+        dimensions=[list(map(int, video[1].split(",")))[1::2]],
+        scorer=scorer,
+        source_software="DeepLabCut",
+        source_software_version=deeplabcut_version,
+        nodes=[pes.name for pes in pose_estimation_series],
+        edges=paf_graph if paf_graph else None,
+    )
+    if 'behavior' in nwbfile.processing:
+        behavior_pm = nwbfile.processing["behavior"]
+    else:
+        behavior_pm = nwbfile.create_processing_module(
+            name="behavior", description="processed behavioral data"
+        )
+    behavior_pm.add(pe)
+    return nwbfile
+
+
+def write_subject_to_nwb(nwbfile, h5file, individual_name, config_file):
+    """
+    Given, subject name, write h5file to an existing nwbfile.
+
+    Parameters
+    ----------
+    nwbfile: pynwb.NWBFile
+        nwbfile to write the subject specific pose estimation series.
+    h5file : str
+        Path to a h5 data file
+    individual_name : str
+        Name of the subject (whose pose is predicted) for single-animal DLC project.
+        For multi-animal projects, the names from the DLC project will be used directly.
+    config_file : str
+        Path to a project config.yaml file
+    config_dict : dict
+        dict containing configuration options. Provide this as alternative to config.yml file.
+
+    Returns
+    -------
+    nwbfile: pynwb.NWBFile
+        nwbfile with pes written in the behavior module
+    """
+    scorer, df, video, paf_graph, timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
+    df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
+    return _write_pes_to_nwbfile(nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps)
+
+
+def convert_h5_to_nwb(config, h5file, individual_name="ind1", infer_timestamps=True):
+    """
+    Convert a DeepLabCut (DLC) video prediction, h5 data file to Neurodata Without Borders (NWB). Also
+    takes project config, to store relevant metadata.
+
+    Parameters
+    ----------
+    config : str
+        Path to a project config.yaml file
+
+    h5file : str
+        Path to a h5 data file
+
+    individual_name : str
+        Name of the subject (whose pose is predicted) for single-animal DLC project.
+        For multi-animal projects, the names from the DLC project will be used directly.
+
+    infer_timestamps : bool
+        Default True. Uses framerate to infer the timestamps returned as 0 from OpenCV.
+        If False, exclude these frames from resulting NWB file.
+
+    TODO: allow one to overwrite those names, with a mapping?
+
+    Returns
+    -------
+    list of str
+        List of paths to the newly created NWB data files.
+        By default NWB files are stored in the same folder as the h5file.
+
+    """
+    scorer, df, video, paf_graph, timestamps, cfg = _get_pes_args(config, h5file, individual_name, 
+                                                                  infer_timestamps=infer_timestamps)
+    output_paths = []
+    for animal, df_ in df.groupby(level="individuals", axis=1):
+        nwbfile = NWBFile(
+            session_description=cfg["Task"],
+            experimenter=cfg["scorer"],
+            identifier=scorer,
+            session_start_time=datetime.datetime.now(datetime.timezone.utc),
+        )
+
+        # TODO Store the test_pose_config as well?
+        nwbfile = _write_pes_to_nwbfile(nwbfile, animal, df_, scorer, video, paf_graph, timestamps,
+                                        exclude_nans=(not infer_timestamps))
+        output_path = h5file.replace(".h5", f"_{animal}.nwb")
+        with warnings.catch_warnings(), NWBHDF5IO(output_path, mode="w") as io:
+            warnings.filterwarnings("ignore", category=DtypeConversionWarning)
+            io.write(nwbfile)
+        output_paths.append(output_path)
+
+    return output_paths

From 76726b5270b4541561ad7fa139618109009917b6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:11:49 +0000
Subject: [PATCH 10/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../behavior/deeplabcut/_dlc_utils.py         | 65 ++++++++-----------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 01a10de62..f1d8d7b64 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -1,19 +1,19 @@
 import datetime
+import importlib
 import os
 import pickle
 import warnings
 from pathlib import Path
 from platform import python_version
-import importlib
 
 import cv2
-import yaml
 import numpy as np
 import pandas as pd
+import yaml
 from hdmf.build.warnings import DtypeConversionWarning
+from ndx_pose import PoseEstimation, PoseEstimationSeries
 from packaging.version import Version  # Installed with setuptools
-from pynwb import NWBFile, NWBHDF5IO
-from ndx_pose import PoseEstimationSeries, PoseEstimation
+from pynwb import NWBHDF5IO, NWBFile
 from ruamel.yaml import YAML
 
 
@@ -32,10 +32,7 @@ def _read_config(configname):
                     cfg["project_path"] = curr_dir
         except Exception as err:
             if len(err.args) > 2:
-                if (
-                    err.args[2]
-                    == "could not determine a constructor for the tag '!!python/tuple'"
-                ):
+                if err.args[2] == "could not determine a constructor for the tag '!!python/tuple'":
                     with open(path, "r") as ymlfile:
                         cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
                 else:
@@ -48,7 +45,6 @@ def _read_config(configname):
     return cfg
 
 
-
 def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=True):
     """
     Return numpy array of the timestamps for a video.
@@ -77,15 +73,14 @@ def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=Tr
         # Infers times when OpenCV provides 0s
         warning_msg = "Removing"
         timestamp_zero_count = np.count_nonzero(timestamps == 0)
-        timestamps[1:][timestamps[1:] == 0] = np.nan # replace 0s with nan
+        timestamps[1:][timestamps[1:] == 0] = np.nan  # replace 0s with nan
 
         if infer_timestamps:
             warning_msg = "Replacing"
             timestamps = _infer_nan_timestamps(timestamps)
 
-        warnings.warn( # warns user of percent of 0 frames
-            "%s cv2 timestamps returned as 0: %f%%"
-            % (warning_msg, ( timestamp_zero_count / len(timestamps) * 100))
+        warnings.warn(  # warns user of percent of 0 frames
+            "%s cv2 timestamps returned as 0: %f%%" % (warning_msg, (timestamp_zero_count / len(timestamps) * 100))
         )
 
     return timestamps
@@ -95,16 +90,14 @@ def _infer_nan_timestamps(timestamps):
     """Given np.array, interpolate nan values using index * sampling rate"""
     bad_timestamps_mask = np.isnan(timestamps)
     # Runs of good timestamps
-    good_run_indices = np.where( 
-        np.diff(np.hstack(([False], bad_timestamps_mask == False, [False])))
-    )[0].reshape(-1, 2)
-    
+    good_run_indices = np.where(np.diff(np.hstack(([False], bad_timestamps_mask == False, [False]))))[0].reshape(-1, 2)
+
     # For each good run, get the diff and append to cumulative array
     sampling_diffs = np.array([])
-    for idx in good_run_indices: 
-        sampling_diffs = np.append(sampling_diffs, np.diff(timestamps[idx[0]:idx[1]]))
-    estimated_sampling_rate = np.mean(sampling_diffs) # Average over diffs
-    
+    for idx in good_run_indices:
+        sampling_diffs = np.append(sampling_diffs, np.diff(timestamps[idx[0] : idx[1]]))
+    estimated_sampling_rate = np.mean(sampling_diffs)  # Average over diffs
+
     # Infer timestamps with avg sampling rate
     bad_timestamps_indexes = np.argwhere(bad_timestamps_mask)[:, 0]
     inferred_timestamps = bad_timestamps_indexes * estimated_sampling_rate
@@ -118,9 +111,7 @@ def _ensure_individuals_in_header(df, dummy_name):
         # Single animal project -> add individual row to
         # the header of single animal projects.
         temp = pd.concat({dummy_name: df}, names=["individuals"], axis=1)
-        df = temp.reorder_levels(
-            ["scorer", "individuals", "bodyparts", "coords"], axis=1
-        )
+        df = temp.reorder_levels(["scorer", "individuals", "bodyparts", "coords"], axis=1)
     return df
 
 
@@ -166,9 +157,7 @@ def _get_pes_args(config_file, h5file, individual_name, infer_timestamps=True):
         warnings.warn(f"The video file corresponding to {h5file} could not be found...")
         video = "fake_path", "0, 0, 0, 0"
 
-        timestamps = (
-            df.index.tolist()
-        )  # setting timestamps to dummy TODO: extract timestamps in DLC?
+        timestamps = df.index.tolist()  # setting timestamps to dummy TODO: extract timestamps in DLC?
     else:
         timestamps = _get_movie_timestamps(video[0], infer_timestamps=infer_timestamps)
     return scorer, df, video, paf_graph, timestamps, cfg
@@ -183,15 +172,15 @@ def _write_pes_to_nwbfile(
     paf_graph,
     timestamps,
     exclude_nans,
-):  
+):
     pose_estimation_series = []
     for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
         data = xyp.to_numpy()
 
-        if exclude_nans: 
+        if exclude_nans:
             # exclude_nans is inverse infer_timestamps. if not infer, there may be nans
             data = data[~np.isnan(timestamps)]
-            timestamps_cleaned = timestamps[~np.isnan(timestamps)] 
+            timestamps_cleaned = timestamps[~np.isnan(timestamps)]
         else:
             timestamps_cleaned = timestamps
 
@@ -223,12 +212,10 @@ def _write_pes_to_nwbfile(
         nodes=[pes.name for pes in pose_estimation_series],
         edges=paf_graph if paf_graph else None,
     )
-    if 'behavior' in nwbfile.processing:
+    if "behavior" in nwbfile.processing:
         behavior_pm = nwbfile.processing["behavior"]
     else:
-        behavior_pm = nwbfile.create_processing_module(
-            name="behavior", description="processed behavioral data"
-        )
+        behavior_pm = nwbfile.create_processing_module(name="behavior", description="processed behavioral data")
     behavior_pm.add(pe)
     return nwbfile
 
@@ -291,8 +278,9 @@ def convert_h5_to_nwb(config, h5file, individual_name="ind1", infer_timestamps=T
         By default NWB files are stored in the same folder as the h5file.
 
     """
-    scorer, df, video, paf_graph, timestamps, cfg = _get_pes_args(config, h5file, individual_name, 
-                                                                  infer_timestamps=infer_timestamps)
+    scorer, df, video, paf_graph, timestamps, cfg = _get_pes_args(
+        config, h5file, individual_name, infer_timestamps=infer_timestamps
+    )
     output_paths = []
     for animal, df_ in df.groupby(level="individuals", axis=1):
         nwbfile = NWBFile(
@@ -303,8 +291,9 @@ def convert_h5_to_nwb(config, h5file, individual_name="ind1", infer_timestamps=T
         )
 
         # TODO Store the test_pose_config as well?
-        nwbfile = _write_pes_to_nwbfile(nwbfile, animal, df_, scorer, video, paf_graph, timestamps,
-                                        exclude_nans=(not infer_timestamps))
+        nwbfile = _write_pes_to_nwbfile(
+            nwbfile, animal, df_, scorer, video, paf_graph, timestamps, exclude_nans=(not infer_timestamps)
+        )
         output_path = h5file.replace(".h5", f"_{animal}.nwb")
         with warnings.catch_warnings(), NWBHDF5IO(output_path, mode="w") as io:
             warnings.filterwarnings("ignore", category=DtypeConversionWarning)

From 0ab7fe77870e733dc76412a063138846917d8d3c Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:12:55 -0400
Subject: [PATCH 11/31] remove from requirements

---
 .../datainterfaces/behavior/deeplabcut/requirements.txt         | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
index 17d8300fb..29659c517 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
@@ -1,4 +1,2 @@
-dlc2nwb>=0.3
-tables<3.9.0;python_version<'3.9'  # imported by package but not included in pip setup (is included in setup.cfg)
 tables<3.9.2;sys_platform=="darwin"
 tables;sys_platform=="linux" or sys_platform=="win32"

From 3882cdb9a0615acbd6ef3647c1a244751fb3216b Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:14:34 -0400
Subject: [PATCH 12/31] adjust lazy import

---
 .../behavior/deeplabcut/deeplabcutdatainterface.py        | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index 8fa1d3cf1..f946e00fc 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -37,14 +37,14 @@ def write_subject_to_nwb(
     nwbfile : pynwb.NWBFile
         nwbfile with pes written in the behavior module
     """
-    dlc2nwb = get_package(package_name="dlc2nwb")
+    from ._dlc_utils import _get_pes_args, _write_pes_to_nwbfile
 
-    scorer, df, video, paf_graph, dlc_timestamps, _ = dlc2nwb.utils._get_pes_args(config_file, h5file, individual_name)
+    scorer, df, video, paf_graph, dlc_timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
     if timestamps is None:
         timestamps = dlc_timestamps
 
     df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
-    return dlc2nwb.utils._write_pes_to_nwbfile(
+    return _write_pes_to_nwbfile(
         nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False
     )
 
@@ -126,7 +126,6 @@ def set_aligned_timestamps(self, aligned_timestamps: Union[List, np.ndarray]):
         aligned_timestamps : list, np.ndarray
             alternative timestamps vector.
         """
-
         self._timestamps = np.array(aligned_timestamps)
 
     def add_to_nwbfile(
@@ -144,7 +143,6 @@ def add_to_nwbfile(
         metadata: dict
             metadata info for constructing the nwb file (optional).
         """
-
         write_subject_to_nwb(
             nwbfile=nwbfile,
             h5file=str(self.source_data["file_path"]),

From 82555a53fff0815260377c48e9fac0b5ad504a85 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:14:54 -0400
Subject: [PATCH 13/31] remove 3.8 classifier

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1af10b6c3..e06c90077 100644
--- a/setup.py
+++ b/setup.py
@@ -88,7 +88,6 @@
     license="BSD-3-Clause",
     classifiers=[
         "Intended Audience :: Science/Research",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",

From 2696417ae4f6b7c83cc172d097e2b0106c168617 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:18:54 -0400
Subject: [PATCH 14/31] Update CHANGELOG.md

---
 CHANGELOG.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04e10f6a7..14ec72857 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,8 +19,7 @@
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
 * Add an option to suppress display the progress bar (tqdm) in `VideoContext`  [PR #937](https://github.com/catalystneuro/neuroconv/pull/937)
 * Automatic compression of data in the `LightnignPoseDataInterface` has been disabled - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #942](https://github.com/catalystneuro/neuroconv/pull/942)
-
-
+* Port over `dlc2nwb` utility functions for ease of maintenance. [PR #946](https://github.com/catalystneuro/neuroconv/pull/946)
 
 
 

From 5d42589571141142390b3b35b89f69dabb03cb0e Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:27:23 -0400
Subject: [PATCH 15/31] remove unused functions, move over essential piece

---
 .../behavior/deeplabcut/_dlc_utils.py         | 104 +++++++-----------
 1 file changed, 37 insertions(+), 67 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index f1d8d7b64..b546922ad 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -5,6 +5,7 @@
 import warnings
 from pathlib import Path
 from platform import python_version
+from typing import Optional, Union, List
 
 import cv2
 import numpy as np
@@ -16,6 +17,7 @@
 from pynwb import NWBHDF5IO, NWBFile
 from ruamel.yaml import YAML
 
+from ....utils import FilePathType
 
 def _read_config(configname):
     """
@@ -58,13 +60,20 @@ def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=Tr
 
     reader = cv2.VideoCapture(movie_file)
     timestamps = []
+    n_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = reader.get(cv2.CAP_PROP_FPS)
+
+    for _ in range(n_frames):
+        _ = reader.read()
+        timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC))
+    
     for _ in range(len(reader)):
         _ = reader.read()
         timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC))
 
     timestamps = np.array(timestamps) / 1000  # Convert to seconds
 
-    if np.nanvar(np.diff(timestamps)) < 1.0 / reader.fps * 1.0 / VARIABILITYBOUND:
+    if np.nanvar(np.diff(timestamps)) < 1.0 / fps * 1.0 / VARIABILITYBOUND:
         warnings.warn(
             "Variability of timestamps suspiciously small. See: https://github.com/DeepLabCut/DLC2NWB/issues/1"
         )
@@ -172,7 +181,10 @@ def _write_pes_to_nwbfile(
     paf_graph,
     timestamps,
     exclude_nans,
+    pose_estimation_container_kwargs: Optional[dict] = None,
 ):
+    pose_estimation_container_kwargs = pose_estimation_container_kwargs or dict()
+
     pose_estimation_series = []
     for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False):
         data = xyp.to_numpy()
@@ -211,6 +223,7 @@ def _write_pes_to_nwbfile(
         source_software_version=deeplabcut_version,
         nodes=[pes.name for pes in pose_estimation_series],
         edges=paf_graph if paf_graph else None,
+        **pose_estimation_container_kwargs,
     )
     if "behavior" in nwbfile.processing:
         behavior_pm = nwbfile.processing["behavior"]
@@ -220,84 +233,41 @@ def _write_pes_to_nwbfile(
     return nwbfile
 
 
-def write_subject_to_nwb(nwbfile, h5file, individual_name, config_file):
+def add_subject_to_nwbfile(
+    nwbfile: NWBFile,
+    h5file: FilePathType,
+    individual_name: str,
+    config_file: FilePathType,
+    timestamps: Optional[Union[List, np.ndarray]] = None,
+) -> NWBFile:
     """
-    Given, subject name, write h5file to an existing nwbfile.
+    Given the subject name, add the DLC .h5 file to an in-memory NWBFile object.
 
     Parameters
     ----------
-    nwbfile: pynwb.NWBFile
-        nwbfile to write the subject specific pose estimation series.
-    h5file : str
-        Path to a h5 data file
+    nwbfile : pynwb.NWBFile
+        The in-memory nwbfile object to which the subject specific pose estimation series will be added.
+    h5file : str or path
+        Path to the DeepLabCut .h5 output file.
     individual_name : str
         Name of the subject (whose pose is predicted) for single-animal DLC project.
         For multi-animal projects, the names from the DLC project will be used directly.
-    config_file : str
+    config_file : str or path
         Path to a project config.yaml file
-    config_dict : dict
-        dict containing configuration options. Provide this as alternative to config.yml file.
+    timestamps : list, np.ndarray or None, default: None
+        Alternative timestamps vector. If None, then use the inferred timestamps from DLC2NWB
 
     Returns
     -------
-    nwbfile: pynwb.NWBFile
+    nwbfile : pynwb.NWBFile
         nwbfile with pes written in the behavior module
     """
-    scorer, df, video, paf_graph, timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
-    df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
-    return _write_pes_to_nwbfile(nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps)
-
-
-def convert_h5_to_nwb(config, h5file, individual_name="ind1", infer_timestamps=True):
-    """
-    Convert a DeepLabCut (DLC) video prediction, h5 data file to Neurodata Without Borders (NWB). Also
-    takes project config, to store relevant metadata.
-
-    Parameters
-    ----------
-    config : str
-        Path to a project config.yaml file
-
-    h5file : str
-        Path to a h5 data file
-
-    individual_name : str
-        Name of the subject (whose pose is predicted) for single-animal DLC project.
-        For multi-animal projects, the names from the DLC project will be used directly.
-
-    infer_timestamps : bool
-        Default True. Uses framerate to infer the timestamps returned as 0 from OpenCV.
-        If False, exclude these frames from resulting NWB file.
-
-    TODO: allow one to overwrite those names, with a mapping?
-
-    Returns
-    -------
-    list of str
-        List of paths to the newly created NWB data files.
-        By default NWB files are stored in the same folder as the h5file.
+    scorer, df, video, paf_graph, dlc_timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
+    if timestamps is None:
+        timestamps = dlc_timestamps
 
-    """
-    scorer, df, video, paf_graph, timestamps, cfg = _get_pes_args(
-        config, h5file, individual_name, infer_timestamps=infer_timestamps
+    df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
+    
+    return _write_pes_to_nwbfile(
+        nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False
     )
-    output_paths = []
-    for animal, df_ in df.groupby(level="individuals", axis=1):
-        nwbfile = NWBFile(
-            session_description=cfg["Task"],
-            experimenter=cfg["scorer"],
-            identifier=scorer,
-            session_start_time=datetime.datetime.now(datetime.timezone.utc),
-        )
-
-        # TODO Store the test_pose_config as well?
-        nwbfile = _write_pes_to_nwbfile(
-            nwbfile, animal, df_, scorer, video, paf_graph, timestamps, exclude_nans=(not infer_timestamps)
-        )
-        output_path = h5file.replace(".h5", f"_{animal}.nwb")
-        with warnings.catch_warnings(), NWBHDF5IO(output_path, mode="w") as io:
-            warnings.filterwarnings("ignore", category=DtypeConversionWarning)
-            io.write(nwbfile)
-        output_paths.append(output_path)
-
-    return output_paths

From 4c504f1a29dc6ba118ab2ff3fd3bfe4665861461 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:27:34 +0000
Subject: [PATCH 16/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py       | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index b546922ad..fe487f6c5 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -5,7 +5,7 @@
 import warnings
 from pathlib import Path
 from platform import python_version
-from typing import Optional, Union, List
+from typing import List, Optional, Union
 
 import cv2
 import numpy as np
@@ -19,6 +19,7 @@
 
 from ....utils import FilePathType
 
+
 def _read_config(configname):
     """
     Reads structured config file defining a project.
@@ -66,7 +67,7 @@ def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=Tr
     for _ in range(n_frames):
         _ = reader.read()
         timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC))
-    
+
     for _ in range(len(reader)):
         _ = reader.read()
         timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC))
@@ -267,7 +268,7 @@ def add_subject_to_nwbfile(
         timestamps = dlc_timestamps
 
     df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
-    
+
     return _write_pes_to_nwbfile(
         nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False
     )

From e09a4fa466dcf1fab6000a37c8c2b882afb25c55 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:28:01 -0400
Subject: [PATCH 17/31] simplify interface

---
 .../deeplabcut/deeplabcutdatainterface.py     | 44 ++-----------------
 1 file changed, 3 insertions(+), 41 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index f946e00fc..4d70f32db 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -9,46 +9,6 @@
 from ....utils import FilePathType
 
 
-def write_subject_to_nwb(
-    nwbfile: NWBFile,
-    h5file: FilePathType,
-    individual_name: str,
-    config_file: FilePathType,
-    timestamps: Optional[Union[List, np.ndarray]] = None,
-):
-    """
-    Given, subject name, write h5file to an existing nwbfile.
-
-    Parameters
-    ----------
-    nwbfile : pynwb.NWBFile
-        The in-memory nwbfile object to which the subject specific pose estimation series will be added.
-    h5file : str or path
-        Path to the DeepLabCut .h5 output file.
-    individual_name : str
-        Name of the subject (whose pose is predicted) for single-animal DLC project.
-        For multi-animal projects, the names from the DLC project will be used directly.
-    config_file : str or path
-        Path to a project config.yaml file
-    timestamps : list, np.ndarray or None, default: None
-        Alternative timestamps vector. If None, then use the inferred timestamps from DLC2NWB
-    Returns
-    -------
-    nwbfile : pynwb.NWBFile
-        nwbfile with pes written in the behavior module
-    """
-    from ._dlc_utils import _get_pes_args, _write_pes_to_nwbfile
-
-    scorer, df, video, paf_graph, dlc_timestamps, _ = _get_pes_args(config_file, h5file, individual_name)
-    if timestamps is None:
-        timestamps = dlc_timestamps
-
-    df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
-    return _write_pes_to_nwbfile(
-        nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False
-    )
-
-
 class DeepLabCutInterface(BaseTemporalAlignmentInterface):
     """Data interface for DeepLabCut datasets."""
 
@@ -143,7 +103,9 @@ def add_to_nwbfile(
         metadata: dict
             metadata info for constructing the nwb file (optional).
         """
-        write_subject_to_nwb(
+        from ._dlc_utils import add_subject_to_nwbfile
+        
+        add_subject_to_nwbfile(
             nwbfile=nwbfile,
             h5file=str(self.source_data["file_path"]),
             individual_name=self.subject_name,

From 4d3f422dbaa4d3aa1b654400d898094b15116f48 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:28:14 +0000
Subject: [PATCH 18/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../behavior/deeplabcut/deeplabcutdatainterface.py              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index 4d70f32db..5af93c07c 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -104,7 +104,7 @@ def add_to_nwbfile(
             metadata info for constructing the nwb file (optional).
         """
         from ._dlc_utils import add_subject_to_nwbfile
-        
+
         add_subject_to_nwbfile(
             nwbfile=nwbfile,
             h5file=str(self.source_data["file_path"]),

From 58f57afa636ebb1b39789b6ea402e814bad64ce1 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 14:34:42 -0400
Subject: [PATCH 19/31] expose to new public helper

---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py         | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index fe487f6c5..39d9c696a 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -240,6 +240,7 @@ def add_subject_to_nwbfile(
     individual_name: str,
     config_file: FilePathType,
     timestamps: Optional[Union[List, np.ndarray]] = None,
+    pose_estimation_container_kwargs: Optional[dict] = None,
 ) -> NWBFile:
     """
     Given the subject name, add the DLC .h5 file to an in-memory NWBFile object.
@@ -257,6 +258,8 @@ def add_subject_to_nwbfile(
         Path to a project config.yaml file
     timestamps : list, np.ndarray or None, default: None
         Alternative timestamps vector. If None, then use the inferred timestamps from DLC2NWB
+    pose_estimation_container_kwargs : dict, optional
+        Dictionary of keyword argument pairs to pass to the PoseEstimation container.
 
     Returns
     -------
@@ -270,5 +273,5 @@ def add_subject_to_nwbfile(
     df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
 
     return _write_pes_to_nwbfile(
-        nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False
+        nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False, pose_estimation_container_kwargs=pose_estimation_container_kwargs,
     )

From 21812fb88a5a7bd4a3779cc71cd936eb0cbf02b6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:34:54 +0000
Subject: [PATCH 20/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 39d9c696a..f80d1855a 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -273,5 +273,13 @@ def add_subject_to_nwbfile(
     df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name)
 
     return _write_pes_to_nwbfile(
-        nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False, pose_estimation_container_kwargs=pose_estimation_container_kwargs,
+        nwbfile,
+        individual_name,
+        df_animal,
+        scorer,
+        video,
+        paf_graph,
+        timestamps,
+        exclude_nans=False,
+        pose_estimation_container_kwargs=pose_estimation_container_kwargs,
     )

From 8f4b18476b9357aa46622111aa69131ebed71539 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 16:43:32 -0400
Subject: [PATCH 21/31] fix init

---
 .../behavior/deeplabcut/deeplabcutdatainterface.py          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index 5af93c07c..95a55161c 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -47,13 +47,13 @@ def __init__(
         verbose: bool, default: True
             controls verbosity.
         """
-        dlc2nwb = get_package(package_name="dlc2nwb")
-
+        from ._dlc_utils import _read_config
+        
         file_path = Path(file_path)
         if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes:
             raise IOError("The file passed in is not a DeepLabCut h5 data file.")
 
-        self._config_file = dlc2nwb.utils.read_config(config_file_path)
+        self._config_file = _read_config(config_file_path=config_file_path)
         self.subject_name = subject_name
         self.verbose = verbose
         super().__init__(file_path=file_path, config_file_path=config_file_path)

From 8b9a50dd0a8caf5ec7661dea66d5b799316e62d3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 20:43:44 +0000
Subject: [PATCH 22/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../behavior/deeplabcut/deeplabcutdatainterface.py              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index 95a55161c..8484f66f6 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -48,7 +48,7 @@ def __init__(
             controls verbosity.
         """
         from ._dlc_utils import _read_config
-        
+
         file_path = Path(file_path)
         if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes:
             raise IOError("The file passed in is not a DeepLabCut h5 data file.")

From b4f07c37b1a485733ac74a56d56344e6b8b0634a Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 16:45:29 -0400
Subject: [PATCH 23/31] update requirements

---
 .../datainterfaces/behavior/deeplabcut/requirements.txt         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
index 29659c517..03e0ee0b0 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt
@@ -1,2 +1,4 @@
 tables<3.9.2;sys_platform=="darwin"
 tables;sys_platform=="linux" or sys_platform=="win32"
+ndx-pose==0.1.1
+neuroconv[video]

From 94584257a45b5b4c28faf352f6799a117185193b Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 20:42:03 -0400
Subject: [PATCH 24/31] synch kwarg

---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index f80d1855a..751e17c53 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -20,17 +20,17 @@
 from ....utils import FilePathType
 
 
-def _read_config(configname):
+def _read_config(config_file_path):
     """
     Reads structured config file defining a project.
     """
     ruamelFile = YAML()
-    path = Path(configname)
+    path = Path(config_file_path)
     if os.path.exists(path):
         try:
             with open(path, "r") as f:
                 cfg = ruamelFile.load(f)
-                curr_dir = os.path.dirname(configname)
+                curr_dir = os.path.dirname(config_file_path)
                 if cfg["project_path"] != curr_dir:
                     cfg["project_path"] = curr_dir
         except Exception as err:

From f87955aa61f16f8531da97eda3e3b2afc1e98a66 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 21:56:08 -0400
Subject: [PATCH 25/31] Test ruff for import removal (#945)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml                       | 10 +++---
 docs/conf.py                                  |  2 +-
 docs/conversion_examples_gallery/conftest.py  |  8 ++++-
 pyproject.toml                                | 36 +++++++++++++------
 .../behavior/audio/audiointerface.py          |  8 ++---
 .../miniscope/miniscopedatainterface.py       |  2 +-
 .../baserecordingextractorinterface.py        |  4 +--
 .../cellexplorer/cellexplorerdatainterface.py |  2 +-
 .../ecephys/intan/intandatainterface.py       |  2 +-
 .../ecephys/spikeglx/spikeglxconverter.py     |  2 --
 .../ecephys/spikeglx/spikeglxdatainterface.py |  2 --
 .../brukertiff/brukertiffdatainterface.py     |  6 ++--
 .../_pydantic_pure_json_schema_generator.py   |  3 --
 src/neuroconv/utils/json_schema.py            |  2 +-
 .../test_ecephys/test_mock_nidq_interface.py  |  2 --
 .../test_ecephys/test_tools_spikeinterface.py |  5 ++-
 tests/test_minimal/test_metadata_schema.py    |  8 ++---
 .../test_tools/globus_transfer_tools.py       | 10 ++----
 .../test_configure_backend_equivalency.py     |  1 -
 ...test_configure_backend_zero_length_axes.py |  4 +--
 .../test_backend_configuration_model.py       |  1 -
 tests/test_on_data/setup_paths.py             |  2 --
 .../test_on_data/test_behavior_interfaces.py  |  1 -
 tests/test_on_data/test_imaging_interfaces.py |  4 +--
 .../test_metadata/test_maxwell_metadata.py    |  2 +-
 .../test_yaml_conversion_specification.py     |  3 +-
 .../test_imagingextractordatachunkiterator.py |  4 +--
 tests/test_ophys/test_tools_roiextractors.py  |  1 -
 28 files changed, 64 insertions(+), 73 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b4ad1d8e2..2fa87f12d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,11 +12,11 @@ repos:
     -   id: black
         exclude: ^docs/
 
--   repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
-    hooks:
-    -   id: isort
-        exclude: ^docs/
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.3.5
+  hooks:
+  - id: ruff
+    args: [ --fix ]
 
 - repo: https://github.com/codespell-project/codespell
   rev: v2.3.0
diff --git a/docs/conf.py b/docs/conf.py
index c4e2a438c..69e38d5e1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,5 @@
-import sys
 import inspect
+import sys
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
diff --git a/docs/conversion_examples_gallery/conftest.py b/docs/conversion_examples_gallery/conftest.py
index 15c2f207c..134b198b4 100644
--- a/docs/conversion_examples_gallery/conftest.py
+++ b/docs/conversion_examples_gallery/conftest.py
@@ -1,7 +1,13 @@
 from pathlib import Path
+
 import pytest
 
-from tests.test_on_data.setup_paths import ECEPHY_DATA_PATH, BEHAVIOR_DATA_PATH, OPHYS_DATA_PATH, TEXT_DATA_PATH
+from tests.test_on_data.setup_paths import (
+    BEHAVIOR_DATA_PATH,
+    ECEPHY_DATA_PATH,
+    OPHYS_DATA_PATH,
+    TEXT_DATA_PATH,
+)
 
 
 @pytest.fixture(autouse=True)
diff --git a/pyproject.toml b/pyproject.toml
index 8f7518b81..331fd35f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,14 @@
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra --doctest-glob='*.rst'"
+testpaths = [
+  "docs/conversion_examples_gallery/",
+  "tests"
+]
+doctest_optionflags = "ELLIPSIS"
+
+
+
 [tool.black]
 line-length = 120
 target-version = ['py38', 'py39', 'py310']
@@ -19,19 +30,22 @@ extend-exclude = '''
 )/
 '''
 
-[tool.pytest.ini_options]
-minversion = "6.0"
-addopts = "-ra --doctest-glob='*.rst'"
-testpaths = [
-"docs/conversion_examples_gallery/",
-"tests"
+
+
+[tool.ruff]
+exclude = [
+  "*/__init__.py"
 ]
-doctest_optionflags = "ELLIPSIS"
 
-[tool.isort]
-profile = "black"
-reverse_relative = true
-known_first_party = ["neuroconv"]
+[tool.ruff.lint]
+select = ["F401", "I"]  # TODO: eventually, expand to other 'F' linting
+fixable = ["ALL"]
+
+[tool.ruff.lint.isort]
+relative-imports-order = "closest-to-furthest"
+known-first-party = ["neuroconv"]
+
+
 
 [tool.codespell]
 skip = '.git*,*.pdf,*.css'
diff --git a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
index 7e29a9fac..6054a65b9 100644
--- a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
+++ b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
@@ -4,23 +4,19 @@
 
 import numpy as np
 import scipy
-from pynwb import NWBFile, TimeSeries
+from pynwb import NWBFile
 
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
 from ....tools.audio import add_acoustic_waveform_series
-from ....tools.nwb_helpers import make_or_load_nwbfile
 from ....utils import (
-    DeepDict,
-    FilePathType,
     get_base_schema,
-    get_schema_from_hdmf_class,
 )
 
 
 def _check_audio_names_are_unique(metadata: dict):
     neurodata_names = [neurodata["name"] for neurodata in metadata]
     neurodata_names_are_unique = len(set(neurodata_names)) == len(neurodata_names)
-    assert neurodata_names_are_unique, f"Some of the names for Audio metadata are not unique."
+    assert neurodata_names_are_unique, "Some of the names for Audio metadata are not unique."
 
 
 class AudioInterface(BaseTemporalAlignmentInterface):
diff --git a/src/neuroconv/datainterfaces/behavior/miniscope/miniscopedatainterface.py b/src/neuroconv/datainterfaces/behavior/miniscope/miniscopedatainterface.py
index d3b4f3b59..e55fec432 100644
--- a/src/neuroconv/datainterfaces/behavior/miniscope/miniscopedatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/miniscope/miniscopedatainterface.py
@@ -56,7 +56,7 @@ def __init__(self, folder_path: FolderPathType):
             miniscope_config_files
         ), f"The configuration files ({configuration_file_name} files) are missing from '{folder_path}'."
 
-        behavcam_subfolders = list(folder_path.glob(f"*/BehavCam*/"))
+        behavcam_subfolders = list(folder_path.glob("*/BehavCam*/"))
         self._miniscope_config = read_miniscope_config(folder_path=str(behavcam_subfolders[0]))
 
         self._recording_start_times = get_recording_start_times(folder_path=str(folder_path))
diff --git a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
index 068f45cff..0cb6cd2c8 100644
--- a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py
@@ -1,5 +1,4 @@
-import json
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import List, Literal, Optional, Union
 
 import numpy as np
 from pynwb import NWBFile
@@ -9,7 +8,6 @@
 from ...baseextractorinterface import BaseExtractorInterface
 from ...utils import (
     DeepDict,
-    NWBMetaDataEncoder,
     get_base_schema,
     get_schema_from_hdmf_class,
 )
diff --git a/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py b/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py
index c5d72992e..e9e7a703f 100644
--- a/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/cellexplorer/cellexplorerdatainterface.py
@@ -214,7 +214,7 @@ def add_channel_metadata_to_recorder_from_channel_map_file(
     """
 
     session_path = Path(folder_path)
-    chan_map_file_path = session_path / f"chanMap.mat"
+    chan_map_file_path = session_path / "chanMap.mat"
     if not chan_map_file_path.is_file():
         return recording_extractor
 
diff --git a/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py b/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
index 2952a3dee..ce3504055 100644
--- a/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/intan/intandatainterface.py
@@ -5,7 +5,7 @@
 from pynwb.ecephys import ElectricalSeries
 
 from ..baserecordingextractorinterface import BaseRecordingExtractorInterface
-from ....tools import get_package, get_package_version
+from ....tools import get_package_version
 from ....utils import FilePathType, get_schema_from_hdmf_class
 
 
diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
index 4ba232afb..bdfe50dd8 100644
--- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
+++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py
@@ -1,8 +1,6 @@
 from pathlib import Path
 from typing import List, Optional
 
-import numpy as np
-
 from .spikeglxdatainterface import SpikeGLXRecordingInterface
 from .spikeglxnidqinterface import SpikeGLXNIDQInterface
 from ....nwbconverter import ConverterPipe
diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py
index d95d5ed75..79a831910 100644
--- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py
+++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py
@@ -1,6 +1,5 @@
 """DataInterfaces for SpikeGLX."""
 
-import warnings
 from pathlib import Path
 from typing import Optional
 
@@ -45,7 +44,6 @@ def __init__(
             Whether to output verbose text.
         es_key : str, default: "ElectricalSeries"
         """
-        from probeinterface import read_spikeglx
 
         self.stream_id = fetch_stream_id_for_spikelgx_file(file_path)
         if es_key is None:
diff --git a/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffdatainterface.py b/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffdatainterface.py
index 787f0ce25..2c663d7e6 100644
--- a/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffdatainterface.py
+++ b/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffdatainterface.py
@@ -97,7 +97,7 @@ def _determine_position_current(self) -> List[float]:
 
             # The frames for each plane will have the same positionCurrent values
             position_element = frames_per_stream[0].find(".//PVStateValue[@key='positionCurrent']")
-            default_z_position_values = default_position_element.find(f"./SubindexedValues[@index='ZAxis']")
+            default_z_position_values = default_position_element.find("./SubindexedValues[@index='ZAxis']")
             z_positions = []
             for z_sub_indexed_value in default_z_position_values:
                 z_value = float(z_sub_indexed_value.attrib["value"])
@@ -166,7 +166,7 @@ def get_metadata(self) -> DeepDict:
         imaging_plane_metadata.update(
             grid_spacing=grid_spacing,
             origin_coords=origin_coords,
-            description=f"The imaging plane origin_coords units are in the microscope reference frame.",
+            description="The imaging plane origin_coords units are in the microscope reference frame.",
         )
 
         two_photon_series_metadata.update(field_of_view=field_of_view)
@@ -248,7 +248,7 @@ def _determine_position_current(self) -> List[float]:
         if not position_element:
             return position_values
 
-        default_z_position_values = default_position_element.find(f"./SubindexedValues[@index='ZAxis']")
+        default_z_position_values = default_position_element.find("./SubindexedValues[@index='ZAxis']")
         z_positions = []
         for z_sub_indexed_value in default_z_position_values:
             z_positions.append(float(z_sub_indexed_value.attrib["value"]))
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_pydantic_pure_json_schema_generator.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_pydantic_pure_json_schema_generator.py
index 19bc1bbf6..7d82afadc 100644
--- a/src/neuroconv/tools/nwb_helpers/_configuration_models/_pydantic_pure_json_schema_generator.py
+++ b/src/neuroconv/tools/nwb_helpers/_configuration_models/_pydantic_pure_json_schema_generator.py
@@ -1,6 +1,3 @@
-import typing
-
-from pydantic import BaseModel, Field
 from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue
 from pydantic_core import PydanticOmit
 from pydantic_core.core_schema import CoreSchema
diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py
index fa24bc04a..b113e2c72 100644
--- a/src/neuroconv/utils/json_schema.py
+++ b/src/neuroconv/utils/json_schema.py
@@ -144,7 +144,7 @@ def get_schema_from_method_signature(method: Callable, exclude: list = None) ->
                 if num_params > 2:
                     raise ValueError(conflict_message)
                 # Special condition for Optional[...]
-                if num_params == 2 and not args[1] is type(None):  # noqa: E721
+                if num_params == 2 and args[1] is not type(None):  # noqa: E721
                     raise ValueError(conflict_message)
 
                 # Guaranteed to only have a single index by this point
diff --git a/tests/test_ecephys/test_mock_nidq_interface.py b/tests/test_ecephys/test_mock_nidq_interface.py
index cd4287fc0..31dbbcf92 100644
--- a/tests/test_ecephys/test_mock_nidq_interface.py
+++ b/tests/test_ecephys/test_mock_nidq_interface.py
@@ -1,10 +1,8 @@
 import pathlib
 from datetime import datetime
 
-from hdmf.testing import TestCase
 from numpy.testing import assert_array_almost_equal
 from pynwb import NWBHDF5IO
-from pynwb.testing.mock.file import mock_NWBFile
 
 from neuroconv.tools.testing import MockSpikeGLXNIDQInterface
 
diff --git a/tests/test_ecephys/test_tools_spikeinterface.py b/tests/test_ecephys/test_tools_spikeinterface.py
index eb9ba420d..23fc5dfce 100644
--- a/tests/test_ecephys/test_tools_spikeinterface.py
+++ b/tests/test_ecephys/test_tools_spikeinterface.py
@@ -8,7 +8,6 @@
 import numpy as np
 import psutil
 import pynwb.ecephys
-from hdmf.backends.hdf5.h5_utils import H5DataIO
 from hdmf.data_utils import DataChunkIterator
 from hdmf.testing import TestCase
 from pynwb import NWBHDF5IO, NWBFile
@@ -367,7 +366,7 @@ def test_variable_offsets_assertion(self):
         self.test_recording_extractor.set_channel_gains(gains=gains)
         self.test_recording_extractor.set_channel_offsets(offsets=offsets)
 
-        reg_expression = f"Recording extractors with heterogeneous offsets are not supported"
+        reg_expression = "Recording extractors with heterogeneous offsets are not supported"
 
         with self.assertRaisesRegex(ValueError, reg_expression):
             add_electrical_series(recording=self.test_recording_extractor, nwbfile=self.nwbfile, iterator_type=None)
@@ -463,7 +462,7 @@ def test_non_iterative_write_assertion(self):
         mock_recorder.get_num_channels.return_value = num_channels
         mock_recorder.get_num_frames.return_value = num_frames_to_overflow
 
-        reg_expression = f"Memory error, full electrical series is (.*?) GiB are available. Use iterator_type='V2'"
+        reg_expression = "Memory error, full electrical series is (.*?) GiB are available. Use iterator_type='V2'"
 
         with self.assertRaisesRegex(MemoryError, reg_expression):
             check_if_recording_traces_fit_into_memory(recording=mock_recorder)
diff --git a/tests/test_minimal/test_metadata_schema.py b/tests/test_minimal/test_metadata_schema.py
index 9b50cd414..a66b4043e 100644
--- a/tests/test_minimal/test_metadata_schema.py
+++ b/tests/test_minimal/test_metadata_schema.py
@@ -117,7 +117,7 @@ def test_invalid_ophys_metadata():
         ),
         dict(
             message="'name' is a required property",
-            path=f"$.Ophys.DFOverF.DFChan1Plane1.raw",
+            path="$.Ophys.DFOverF.DFChan1Plane1.raw",
         ),
         dict(
             message="'name' is a required property",
@@ -163,15 +163,15 @@ def test_invalid_ophys_plane_metadata():
     expected_errors = [
         dict(
             message="{'name': 'Fluorescence'} does not have enough properties",
-            path=f"$.Ophys.Fluorescence",
+            path="$.Ophys.Fluorescence",
         ),
         dict(
             message="{'name': 'DfOverF'} does not have enough properties",
-            path=f"$.Ophys.DFOverF",
+            path="$.Ophys.DFOverF",
         ),
         dict(
             message="{'name': 'SegmentationImages'} does not have enough properties",
-            path=f"$.Ophys.SegmentationImages",
+            path="$.Ophys.SegmentationImages",
         ),
     ]
 
diff --git a/tests/test_minimal/test_tools/globus_transfer_tools.py b/tests/test_minimal/test_tools/globus_transfer_tools.py
index 856ead3e6..ff8517833 100644
--- a/tests/test_minimal/test_tools/globus_transfer_tools.py
+++ b/tests/test_minimal/test_tools/globus_transfer_tools.py
@@ -1,3 +1,4 @@
+import importlib
 import os
 import unittest
 from pathlib import Path
@@ -15,14 +16,9 @@
     transfer_globus_content,
 )
 
-try:
-    import globus_cli
+HAVE_GLOBUS = importlib.util.find_spec(name="globus_cli") is None
+LOGGED_INTO_GLOBUS = os.popen("globus ls 188a6110-96db-11eb-b7a9-f57b2d55370d").read()
 
-    HAVE_GLOBUS, LOGGED_INTO_GLOBUS = True, True
-    if not os.popen("globus ls 188a6110-96db-11eb-b7a9-f57b2d55370d").read():
-        LOGGED_INTO_GLOBUS = False
-except ModuleNotFoundError:
-    HAVE_GLOBUS, LOGGED_INTO_GLOBUS = False, False
 DANDI_API_KEY = os.getenv("DANDI_API_KEY")
 HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != ""  # can be "" from external forks
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_equivalency.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_equivalency.py
index af287cf47..dd938721f 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_equivalency.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_equivalency.py
@@ -7,7 +7,6 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_array_equal
-from pynwb import NWBFile, ProcessingModule
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.file import mock_NWBFile
 
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_zero_length_axes.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_zero_length_axes.py
index 01045f7d1..39980f22e 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_zero_length_axes.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_configure_backend_zero_length_axes.py
@@ -6,18 +6,16 @@
 """
 
 from pathlib import Path
-from typing import Callable, Literal, Tuple
+from typing import Literal, Tuple
 
 import numcodecs
 import numpy as np
 import pytest
 from hdmf.common import DynamicTable, VectorData
-from hdmf.data_utils import DataChunkIterator
 from numpy.testing import assert_array_equal
 from pynwb.testing.mock.base import mock_TimeSeries
 from pynwb.testing.mock.file import mock_NWBFile
 
-from neuroconv.tools.hdmf import SliceableDataChunkIterator
 from neuroconv.tools.nwb_helpers import (
     BACKEND_NWB_IO,
     configure_backend,
diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_backend_configuration_model.py
index 9594a8477..83dd817a5 100644
--- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_backend_configuration_model.py
+++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_backend_configuration_model.py
@@ -1,6 +1,5 @@
 """Unit tests for the BackendConfiguration Pydantic model."""
 
-import numpy as np
 import pytest
 
 from neuroconv.tools.nwb_helpers import BackendConfiguration
diff --git a/tests/test_on_data/setup_paths.py b/tests/test_on_data/setup_paths.py
index 63cbc9db9..4d5944147 100644
--- a/tests/test_on_data/setup_paths.py
+++ b/tests/test_on_data/setup_paths.py
@@ -2,8 +2,6 @@
 import tempfile
 from pathlib import Path
 
-import pytest
-
 from neuroconv.utils import load_dict_from_file
 
 # Load the configuration for the data tests
diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py
index f6d568ae5..155b32d72 100644
--- a/tests/test_on_data/test_behavior_interfaces.py
+++ b/tests/test_on_data/test_behavior_interfaces.py
@@ -1,7 +1,6 @@
 import unittest
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Literal
 
 import numpy as np
 import pandas as pd
diff --git a/tests/test_on_data/test_imaging_interfaces.py b/tests/test_on_data/test_imaging_interfaces.py
index 088d42719..28f3d43ac 100644
--- a/tests/test_on_data/test_imaging_interfaces.py
+++ b/tests/test_on_data/test_imaging_interfaces.py
@@ -362,8 +362,8 @@ class TestHdf5ImagingInterface(ImagingExtractorInterfaceTestMixin, TestCase):
 class TestSbxImagingInterface(ImagingExtractorInterfaceTestMixin, TestCase):
     data_interface_cls = SbxImagingInterface
     interface_kwargs = [
-        dict(file_path=str(OPHYS_DATA_PATH / "imaging_datasets" / "Scanbox" / f"sample.mat")),
-        dict(file_path=str(OPHYS_DATA_PATH / "imaging_datasets" / "Scanbox" / f"sample.sbx")),
+        dict(file_path=str(OPHYS_DATA_PATH / "imaging_datasets" / "Scanbox" / "sample.mat")),
+        dict(file_path=str(OPHYS_DATA_PATH / "imaging_datasets" / "Scanbox" / "sample.sbx")),
     ]
     save_directory = OUTPUT_PATH
 
diff --git a/tests/test_on_data/test_metadata/test_maxwell_metadata.py b/tests/test_on_data/test_metadata/test_maxwell_metadata.py
index b66653b61..b616fe89e 100644
--- a/tests/test_on_data/test_metadata/test_maxwell_metadata.py
+++ b/tests/test_on_data/test_metadata/test_maxwell_metadata.py
@@ -4,10 +4,10 @@
 from platform import system
 from shutil import rmtree
 from tempfile import mkdtemp
-from zoneinfo import ZoneInfo
 
 import pytest
 from hdmf.testing import TestCase
+from zoneinfo import ZoneInfo
 
 from neuroconv.datainterfaces import MaxOneRecordingInterface
 
diff --git a/tests/test_on_data/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml_conversion_specification.py
index d496b501c..56fd7f6c3 100644
--- a/tests/test_on_data/test_yaml_conversion_specification.py
+++ b/tests/test_on_data/test_yaml_conversion_specification.py
@@ -11,9 +11,8 @@
 from neuroconv import run_conversion_from_yaml
 from neuroconv.utils import load_dict_from_file
 
-from .setup_paths import BEHAVIOR_DATA_PATH
+from .setup_paths import BEHAVIOR_DATA_PATH, OUTPUT_PATH
 from .setup_paths import ECEPHY_DATA_PATH as DATA_PATH
-from .setup_paths import OUTPUT_PATH
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_ophys/test_imagingextractordatachunkiterator.py b/tests/test_ophys/test_imagingextractordatachunkiterator.py
index eadbe45ab..3402b5a90 100644
--- a/tests/test_ophys/test_imagingextractordatachunkiterator.py
+++ b/tests/test_ophys/test_imagingextractordatachunkiterator.py
@@ -52,12 +52,12 @@ def setUpClass(cls) -> None:
         ),
         param(
             buffer_shape=(0, 10, 10),
-            expected_error_msg=f"Some dimensions of buffer_shape ((0, 10, 10)) are less than zero!",
+            expected_error_msg="Some dimensions of buffer_shape ((0, 10, 10)) are less than zero!",
             case_name="buffer_shape_less_than_zero",
         ),
         param(
             chunk_shape=(0, 10, 10),
-            expected_error_msg=f"Some dimensions of chunk_shape ((0, 10, 10)) are less than zero!",
+            expected_error_msg="Some dimensions of chunk_shape ((0, 10, 10)) are less than zero!",
             case_name="chunk_shape_less_than_zero",
         ),
         param(
diff --git a/tests/test_ophys/test_tools_roiextractors.py b/tests/test_ophys/test_tools_roiextractors.py
index 95eb18676..f535b6469 100644
--- a/tests/test_ophys/test_tools_roiextractors.py
+++ b/tests/test_ophys/test_tools_roiextractors.py
@@ -42,7 +42,6 @@
 from neuroconv.tools.roiextractors.roiextractors import (
     get_default_segmentation_metadata,
 )
-from neuroconv.tools.testing.mock_interfaces import MockImagingInterface
 from neuroconv.utils import dict_deep_update
 
 

From 165588e310f68267801c11a95badfea14708dae0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 02:12:31 +0000
Subject: [PATCH 26/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py        | 6 +-----
 .../behavior/deeplabcut/deeplabcutdatainterface.py          | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 751e17c53..675741a2a 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -1,20 +1,16 @@
-import datetime
 import importlib
 import os
 import pickle
 import warnings
 from pathlib import Path
-from platform import python_version
 from typing import List, Optional, Union
 
 import cv2
 import numpy as np
 import pandas as pd
 import yaml
-from hdmf.build.warnings import DtypeConversionWarning
 from ndx_pose import PoseEstimation, PoseEstimationSeries
-from packaging.version import Version  # Installed with setuptools
-from pynwb import NWBHDF5IO, NWBFile
+from pynwb import NWBFile
 from ruamel.yaml import YAML
 
 from ....utils import FilePathType
diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
index 8484f66f6..f5aea74aa 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -5,7 +5,6 @@
 from pynwb.file import NWBFile
 
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
-from ....tools import get_package
 from ....utils import FilePathType
 
 

From 92abc4bde930477af53dd7f307e55c1290627549 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 22:16:15 -0400
Subject: [PATCH 27/31] lazy imports in utils

---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 675741a2a..67339ab1f 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -5,11 +5,9 @@
 from pathlib import Path
 from typing import List, Optional, Union
 
-import cv2
 import numpy as np
 import pandas as pd
 import yaml
-from ndx_pose import PoseEstimation, PoseEstimationSeries
 from pynwb import NWBFile
 from ruamel.yaml import YAML
 
@@ -53,7 +51,7 @@ def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=Tr
     movie_file : str
         Path to movie_file
     """
-    # TODO: consider moving this to DLC, and actually extract alongside video analysis!
+    import cv2
 
     reader = cv2.VideoCapture(movie_file)
     timestamps = []
@@ -180,6 +178,8 @@ def _write_pes_to_nwbfile(
     exclude_nans,
     pose_estimation_container_kwargs: Optional[dict] = None,
 ):
+    from ndx_pose import PoseEstimation, PoseEstimationSeries
+    
     pose_estimation_container_kwargs = pose_estimation_container_kwargs or dict()
 
     pose_estimation_series = []

From b76425215eeaf16b5535121bc9e267cafbd3d1d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 02:16:50 +0000
Subject: [PATCH 28/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 67339ab1f..99a29fc39 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -179,7 +179,7 @@ def _write_pes_to_nwbfile(
     pose_estimation_container_kwargs: Optional[dict] = None,
 ):
     from ndx_pose import PoseEstimation, PoseEstimationSeries
-    
+
     pose_estimation_container_kwargs = pose_estimation_container_kwargs or dict()
 
     pose_estimation_series = []

From c18819eba27dacdbf9c0b3d76876ec1c06923b35 Mon Sep 17 00:00:00 2001
From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
Date: Wed, 10 Jul 2024 22:19:57 -0400
Subject: [PATCH 29/31] generalize container kwarg passing

---
 .../behavior/deeplabcut/_dlc_utils.py             | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 99a29fc39..669a745e2 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -209,7 +209,8 @@ def _write_pes_to_nwbfile(
     is_deeplabcut_installed = importlib.util.find_spec(name="deeplabcut") is not None
     if is_deeplabcut_installed:
         deeplabcut_version = importlib.metadata.version(distribution_name="deeplabcut")
-    pe = PoseEstimation(
+
+    pose_estimation_default_kwargs = dict(
         pose_estimation_series=pose_estimation_series,
         description="2D keypoint coordinates estimated using DeepLabCut.",
         original_videos=[video[0]],
@@ -222,11 +223,15 @@ def _write_pes_to_nwbfile(
         edges=paf_graph if paf_graph else None,
         **pose_estimation_container_kwargs,
     )
-    if "behavior" in nwbfile.processing:
-        behavior_pm = nwbfile.processing["behavior"]
+    pose_estimation_default_kwargs.update(pose_estimation_container_kwargs)
+    pose_estimation_container = PoseEstimation(**pose_estimation_default_kwargs)
+    
+    if "behavior" in nwbfile.processing:  # TODO: replace with get_module
+        behavior_processing_module = nwbfile.processing["behavior"]
     else:
-        behavior_pm = nwbfile.create_processing_module(name="behavior", description="processed behavioral data")
-    behavior_pm.add(pe)
+        behavior_processing_module = nwbfile.create_processing_module(name="behavior", description="processed behavioral data")
+    behavior_processing_module.add(pose_estimation_container)
+    
     return nwbfile
 
 

From 61a56bd8880f3aba88a386aaa484cd69598f4090 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 02:20:47 +0000
Subject: [PATCH 30/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../datainterfaces/behavior/deeplabcut/_dlc_utils.py      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
index 669a745e2..72ddafe28 100644
--- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
+++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -225,13 +225,15 @@ def _write_pes_to_nwbfile(
     )
     pose_estimation_default_kwargs.update(pose_estimation_container_kwargs)
     pose_estimation_container = PoseEstimation(**pose_estimation_default_kwargs)
-    
+
     if "behavior" in nwbfile.processing:  # TODO: replace with get_module
         behavior_processing_module = nwbfile.processing["behavior"]
     else:
-        behavior_processing_module = nwbfile.create_processing_module(name="behavior", description="processed behavioral data")
+        behavior_processing_module = nwbfile.create_processing_module(
+            name="behavior", description="processed behavioral data"
+        )
     behavior_processing_module.add(pose_estimation_container)
-    
+
     return nwbfile
 
 

From 5548853b6226a7e7f746687efc69c3afa763bc13 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 10 Jul 2024 22:46:14 -0600
Subject: [PATCH 31/31] context `make_or_load_nwbfile` to raise an error when
 nwbfile needs to be created but metadata is None (#948)

Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 .../nwb_helpers/_metadata_and_file_helpers.py | 43 ++++++++++++++-----
 .../test_tools/test_context_tools.py          |  9 ++++
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04e10f6a7..69d038f49 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@
 * Fixed a case of the `NeuroScopeSortingExtractor` when the optional `xml_file_path` is not specified. [PR #926](https://github.com/catalystneuro/neuroconv/pull/926)
 * Fixed `Can't specify experiment type when converting .abf to .nwb with Neuroconv`. [PR #609](https://github.com/catalystneuro/neuroconv/pull/609)
 * Remove assumption that the ports of the Intan acquisition system correspond to electrode groupings in `IntanRecordingInterface`  [PR #933](https://github.com/catalystneuro/neuroconv/pull/933)
+* Add ValueError for empty metadata in  `make_or_load_nwbfile` when an nwbfile needs to be created [PR #948](https://github.com/catalystneuro/neuroconv/pull/948)
 
 ### Improvements
 * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943)
diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
index ccf2a3e75..9ba9c1376 100644
--- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
+++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py
@@ -67,6 +67,7 @@ def make_nwbfile_from_metadata(metadata: dict) -> NWBFile:
     # Validate metadata
     schema_path = Path(__file__).resolve().parent.parent.parent / "schemas" / "base_metadata_schema.json"
     base_metadata_schema = load_dict_from_file(file_path=schema_path)
+    assert metadata is not None, "Metadata is required to create an NWBFile but metadata=None was passed."
     validate_metadata(metadata=metadata, schema=base_metadata_schema)
 
     nwbfile_kwargs = deepcopy(metadata["NWBFile"])
@@ -190,7 +191,12 @@ def make_or_load_nwbfile(
     """
     from . import BACKEND_NWB_IO
 
-    nwbfile_path_in = Path(nwbfile_path) if nwbfile_path else None
+    nwbfile_path_is_provided = nwbfile_path is not None
+    nwbfile_path_in = Path(nwbfile_path) if nwbfile_path_is_provided else None
+
+    nwbfile_is_provided = nwbfile is not None
+    nwbfile_in = nwbfile if nwbfile_is_provided else None
+
     backend_io_class = BACKEND_NWB_IO[backend]
 
     assert not (nwbfile_path is None and nwbfile is None and metadata is None), (
@@ -206,11 +212,11 @@ def make_or_load_nwbfile(
         raise NotImplementedError("Appending a Zarr file is not yet supported!")
 
     load_kwargs = dict()
-    file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_in is not None else None
-    if nwbfile_path_in is not None:
+    file_initially_exists = nwbfile_path_in.exists() if nwbfile_path_is_provided else False
+    append_mode = file_initially_exists and not overwrite
+    if nwbfile_path_is_provided:
         load_kwargs.update(path=str(nwbfile_path_in))
 
-        append_mode = file_initially_exists and not overwrite
         if append_mode:
             load_kwargs.update(mode="r+", load_namespaces=True)
 
@@ -234,19 +240,31 @@ def make_or_load_nwbfile(
 
         io = backend_io_class(**load_kwargs)
 
+    read_nwbfile = nwbfile_path_is_provided and append_mode
+    create_nwbfile = not read_nwbfile and not nwbfile_is_provided
+
     nwbfile_loaded_succesfully = True
     nwbfile_written_succesfully = True
     try:
-        if load_kwargs.get("mode", "") == "r+":
+        if nwbfile_is_provided:
+            nwbfile = nwbfile_in
+        elif read_nwbfile:
             nwbfile = io.read()
-        elif nwbfile is None:
+        elif create_nwbfile:
+            if metadata is None:
+                error_msg = "Metadata is required for creating an nwbfile "
+                raise ValueError(error_msg)
+            default_metadata = get_default_nwbfile_metadata()
+            default_metadata.deep_update(metadata)
+
             nwbfile = make_nwbfile_from_metadata(metadata=metadata)
+
         yield nwbfile
     except Exception as load_error:
         nwbfile_loaded_succesfully = False
         raise load_error
     finally:
-        if nwbfile_path_in is not None and nwbfile_loaded_succesfully:
+        if nwbfile_path_is_provided and nwbfile_loaded_succesfully:
             try:
                 io.write(nwbfile)
 
@@ -261,15 +279,18 @@ def make_or_load_nwbfile(
 
                 if not nwbfile_written_succesfully:
                     _attempt_cleanup_of_existing_nwbfile(nwbfile_path=nwbfile_path_in)
-        elif nwbfile_path_in is not None and not nwbfile_loaded_succesfully:
+        elif nwbfile_path_is_provided and not nwbfile_loaded_succesfully:
             # The instantiation of the IO object can itself create a file
             _attempt_cleanup_of_existing_nwbfile(nwbfile_path=nwbfile_path_in)
 
+        else:
+            # This is the case where nwbfile is provided but not nwbfile_path
+            # Note that io never gets created in this case, so no need to close or delete it
+            pass
+
         # Final attempt to cleanup an unintended file creation, just to be sure
         any_load_or_write_error = not nwbfile_loaded_succesfully or not nwbfile_written_succesfully
-        file_was_freshly_created = (
-            not file_initially_exists and nwbfile_path_in is not None and nwbfile_path_in.exists()
-        )
+        file_was_freshly_created = not file_initially_exists and nwbfile_path_is_provided and nwbfile_path_in.exists()
         attempt_to_cleanup = any_load_or_write_error and file_was_freshly_created
         if attempt_to_cleanup:
             _attempt_cleanup_of_existing_nwbfile(nwbfile_path=nwbfile_path_in)
diff --git a/tests/test_minimal/test_tools/test_context_tools.py b/tests/test_minimal/test_tools/test_context_tools.py
index 71ba8a567..ec12a4ad3 100644
--- a/tests/test_minimal/test_tools/test_context_tools.py
+++ b/tests/test_minimal/test_tools/test_context_tools.py
@@ -7,6 +7,7 @@
 from unittest.mock import patch
 
 import h5py
+import pytest
 from hdmf.testing import TestCase
 from hdmf_zarr import NWBZarrIO
 from pynwb import NWBHDF5IO, TimeSeries
@@ -220,3 +221,11 @@ def test_make_or_load_nwbfile_on_corrupt_file(tmpdir: Path) -> None:
     with make_or_load_nwbfile(nwbfile_path=nwbfile_path, nwbfile=nwbfile_in, overwrite=True) as nwbfile:
         time_series = mock_TimeSeries()
         nwbfile.add_acquisition(time_series)
+
+
+def test_raise_error_when_metadata_is_empty_and_creation_is_needed(tmpdir):
+    nwbfile_path = tmpdir / "test_make_or_load_nwbfile_empty_metadata.nwb"
+
+    with pytest.raises(ValueError):
+        with make_or_load_nwbfile(nwbfile_path=nwbfile_path, metadata=None, overwrite=True) as nwbfile:
+            pass