From 80325265ba51ed0cb802c03c32a476cb7f69f284 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sat, 28 Sep 2024 17:44:54 -0400 Subject: [PATCH 01/28] adjust docker tag on main to rebuild --- .../workflows/build_and_upload_docker_image_yaml_variable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml index 3c2a06d93..d5c6da058 100644 --- a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml +++ b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml @@ -31,7 +31,7 @@ jobs: uses: docker/build-push-action@v5 with: push: true # Push is a shorthand for --output=type=registry - tags: ghcr.io/catalystneuro/neuroconv:yaml_variable + tags: ghcr.io/catalystneuro/neuroconv_yaml_variable:latest context: . file: dockerfiles/neuroconv_latest_yaml_variable provenance: false From 6735e097a09326772a52f70a667f71770d9a95ea Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sat, 28 Sep 2024 18:14:16 -0400 Subject: [PATCH 02/28] adjust dockerfile on main to rebuild --- dockerfiles/neuroconv_latest_yaml_variable | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable index ea411ee44..5500f14f0 100644 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -1,4 +1,4 @@ FROM ghcr.io/catalystneuro/neuroconv:latest LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." -CMD echo "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite +CMD printf "$NEUROCONV_YAML" > ./run.yml && neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite From 0cc66de36d91b43f718507c97447ca56382dcb74 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sat, 28 Sep 2024 20:57:45 -0400 Subject: [PATCH 03/28] hotfix base image --- dockerfiles/neuroconv_latest_yaml_variable | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable index 5500f14f0..04d2f0847 100644 --- a/dockerfiles/neuroconv_latest_yaml_variable +++ b/dockerfiles/neuroconv_latest_yaml_variable @@ -1,4 +1,5 @@ -FROM ghcr.io/catalystneuro/neuroconv:latest +# TODO: make this neuroconv:latest once optional installations are working again +FROM ghcr.io/catalystneuro/neuroconv:dev LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments." CMD printf "$NEUROCONV_YAML" > ./run.yml && neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite From ccc4a1e7bee62a5bd07d9da5c17072f26ab2bea8 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Tue, 8 Oct 2024 09:09:07 -0400 Subject: [PATCH 04/28] Slimmer code blocks in Docker usage docs (#1102) --- docs/user_guide/docker_demo.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/user_guide/docker_demo.rst b/docs/user_guide/docker_demo.rst index a943cfa58..ddf255070 100644 --- a/docs/user_guide/docker_demo.rst +++ b/docs/user_guide/docker_demo.rst @@ -102,7 +102,11 @@ It relies on some of the GIN data from the main testing suite, see :ref:`example .. code:: - docker run -t --volume /home/user/demo_neuroconv_docker:/demo_neuroconv_docker ghcr.io/catalystneuro/neuroconv:latest neuroconv /demo_neuroconv_docker/demo_neuroconv_docker_yaml.yml --output-folder-path /demo_neuroconv_docker/demo_output + docker run -t \ + --volume /home/user/demo_neuroconv_docker:/demo_neuroconv_docker \ + ghcr.io/catalystneuro/neuroconv:latest \ + neuroconv /demo_neuroconv_docker/demo_neuroconv_docker_yaml.yml \ + --output-folder-path /demo_neuroconv_docker/demo_output Voilà! If everything occurred successfully, you should see... @@ -142,6 +146,10 @@ Then, you can use the following command to run the Rclone Docker image: .. code:: - docker run -t --volume destination_folder:destination_folder -e RCLONE_CONFIG="$RCLONE_CONFIG" -e RCLONE_COMMAND="$RCLONE_COMMAND" ghcr.io/catalystneuro/rclone_with_config:latest + docker run -t \ + --volume destination_folder:destination_folder \ + -e RCLONE_CONFIG="$RCLONE_CONFIG" \ + -e RCLONE_COMMAND="$RCLONE_COMMAND" \ + ghcr.io/catalystneuro/rclone_with_config:latest This image is particularly designed for convenience with AWS Batch (EC2) tools that rely heavily on atomic Docker operations. Alternative AWS approaches would have relied on transferring the Rclone configuration file to the EC2 instances using separate transfer protocols or dependent steps, both of which add complexity to the workflow. From bfbbe4bff8245760622e5b873cd846d346c6ae06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:17:53 -0400 Subject: [PATCH 05/28] [pre-commit.ci] pre-commit autoupdate (#1098) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 810976eb5..b2c1d900c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,19 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 24.8.0 + rev: 24.10.0 hooks: - id: black exclude: ^docs/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.5 + rev: v0.6.9 hooks: - id: ruff args: [ --fix ] From 52cd6aa0ea46c0d143739767da4951ade17c3b74 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 11 Oct 2024 08:54:53 -0600 Subject: [PATCH 06/28] Add skip channels to EDF interface (#1110) --- CHANGELOG.md | 1 + .../ecephys/edf/edfdatainterface.py | 32 +++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c7bc13be..d748d79e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Features * Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068) * Data interfaces now perform source (argument inputs) validation with the json schema [PR #1020](https://github.com/catalystneuro/neuroconv/pull/1020) +* Added `channels_to_skip` to `EDFRecordingInterface` so the user can skip non-neural channels [PR #1110](https://github.com/catalystneuro/neuroconv/pull/1110) ## Improvements * Remove dev test from PR [PR #1092](https://github.com/catalystneuro/neuroconv/pull/1092) diff --git a/src/neuroconv/datainterfaces/ecephys/edf/edfdatainterface.py b/src/neuroconv/datainterfaces/ecephys/edf/edfdatainterface.py index 119e9f8d2..ef169f66f 100644 --- a/src/neuroconv/datainterfaces/ecephys/edf/edfdatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/edf/edfdatainterface.py @@ -1,3 +1,5 @@ +from typing import Optional + from pydantic import FilePath from ..baserecordingextractorinterface import BaseRecordingExtractorInterface @@ -23,7 +25,22 @@ def get_source_schema(cls) -> dict: source_schema["properties"]["file_path"]["description"] = "Path to the .edf file." return source_schema - def __init__(self, file_path: FilePath, verbose: bool = True, es_key: str = "ElectricalSeries"): + def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: + + extractor_kwargs = source_data.copy() + extractor_kwargs.pop("channels_to_skip") + extractor_kwargs["all_annotations"] = True + extractor_kwargs["use_names_as_ids"] = True + + return extractor_kwargs + + def __init__( + self, + file_path: FilePath, + verbose: bool = True, + es_key: str = "ElectricalSeries", + channels_to_skip: Optional[list] = None, + ): """ Load and prepare data for EDF. Currently, only continuous EDF+ files (EDF+C) and original EDF files (EDF) are supported @@ -36,15 +53,24 @@ def __init__(self, file_path: FilePath, verbose: bool = True, es_key: str = "Ele verbose : bool, default: True Allows verbose. es_key : str, default: "ElectricalSeries" + Key for the ElectricalSeries metadata + channels_to_skip : list, default: None + Channels to skip when adding the data to the nwbfile. These parameter can be used to skip non-neural + channels that are present in the EDF file. + """ get_package( package_name="pyedflib", - excluded_platforms_and_python_versions=dict(darwin=dict(arm=["3.8", "3.9"])), + excluded_platforms_and_python_versions=dict(darwin=dict(arm=["3.9"])), ) - super().__init__(file_path=file_path, verbose=verbose, es_key=es_key) + super().__init__(file_path=file_path, verbose=verbose, es_key=es_key, channels_to_skip=channels_to_skip) self.edf_header = self.recording_extractor.neo_reader.edf_header + # We remove the channels that are not neural + if channels_to_skip: + self.recording_extractor = self.recording_extractor.remove_channels(remove_channel_ids=channels_to_skip) + def extract_nwb_file_metadata(self) -> dict: nwbfile_metadata = dict( session_start_time=self.edf_header["startdate"], From 165cb31068a3dac62eee65921b4ab09ba1111d85 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 22 Oct 2024 08:12:01 -0600 Subject: [PATCH 07/28] Add more friendly error when writing recording with multiple offsets (#1111) --- CHANGELOG.md | 1 + .../tools/spikeinterface/spikeinterface.py | 33 ++++++++++++++++--- .../test_ecephys/test_tools_spikeinterface.py | 33 +++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d748d79e6..931383066 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Features * Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068) * Data interfaces now perform source (argument inputs) validation with the json schema [PR #1020](https://github.com/catalystneuro/neuroconv/pull/1020) +* Improve the error message when writing a recording extractor with multiple offsets [PR #1111](https://github.com/catalystneuro/neuroconv/pull/1111) * Added `channels_to_skip` to `EDFRecordingInterface` so the user can skip non-neural channels [PR #1110](https://github.com/catalystneuro/neuroconv/pull/1110) ## Improvements diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py index 9128078a6..1be86862a 100644 --- a/src/neuroconv/tools/spikeinterface/spikeinterface.py +++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py @@ -99,7 +99,8 @@ def add_devices_to_nwbfile(nwbfile: pynwb.NWBFile, metadata: Optional[DeepDict] metadata["Ecephys"]["Device"] = [defaults] for device_metadata in metadata["Ecephys"]["Device"]: if device_metadata.get("name", defaults["name"]) not in nwbfile.devices: - nwbfile.create_device(**dict(defaults, **device_metadata)) + device_kwargs = dict(defaults, **device_metadata) + nwbfile.create_device(**device_kwargs) def add_electrode_groups(recording: BaseRecording, nwbfile: pynwb.NWBFile, metadata: dict = None): @@ -778,6 +779,28 @@ def add_electrical_series( ) +def _report_variable_offset(channel_offsets, channel_ids): + """ + Helper function to report variable offsets per channel IDs. + Groups the different available offsets per channel IDs and raises a ValueError. + """ + # Group the different offsets per channel IDs + offset_to_channel_ids = {} + for offset, channel_id in zip(channel_offsets, channel_ids): + if offset not in offset_to_channel_ids: + offset_to_channel_ids[offset] = [] + offset_to_channel_ids[offset].append(channel_id) + + # Create a user-friendly message + message_lines = ["Recording extractors with heterogeneous offsets are not supported."] + message_lines.append("Multiple offsets were found per channel IDs:") + for offset, ids in offset_to_channel_ids.items(): + message_lines.append(f" Offset {offset}: Channel IDs {ids}") + message = "\n".join(message_lines) + + raise ValueError(message) + + def add_electrical_series_to_nwbfile( recording: BaseRecording, nwbfile: pynwb.NWBFile, @@ -905,14 +928,16 @@ def add_electrical_series_to_nwbfile( # Spikeinterface guarantees data in micro volts when return_scaled=True. This multiplies by gain and adds offsets # In nwb to get traces in Volts we take data*channel_conversion*conversion + offset channel_conversion = recording.get_channel_gains() - channel_offset = recording.get_channel_offsets() + channel_offsets = recording.get_channel_offsets() unique_channel_conversion = np.unique(channel_conversion) unique_channel_conversion = unique_channel_conversion[0] if len(unique_channel_conversion) == 1 else None - unique_offset = np.unique(channel_offset) + unique_offset = np.unique(channel_offsets) if unique_offset.size > 1: - raise ValueError("Recording extractors with heterogeneous offsets are not supported") + channel_ids = recording.get_channel_ids() + # This prints a user friendly error where the user is provided with a map from offset to channels + _report_variable_offset(channel_offsets, channel_ids) unique_offset = unique_offset[0] if unique_offset[0] is not None else 0 micro_to_volts_conversion_factor = 1e-6 diff --git a/tests/test_ecephys/test_tools_spikeinterface.py b/tests/test_ecephys/test_tools_spikeinterface.py index 11c29b31f..3436a2e70 100644 --- a/tests/test_ecephys/test_tools_spikeinterface.py +++ b/tests/test_ecephys/test_tools_spikeinterface.py @@ -1,3 +1,4 @@ +import re import unittest from datetime import datetime from pathlib import Path @@ -8,9 +9,11 @@ import numpy as np import psutil import pynwb.ecephys +import pytest from hdmf.data_utils import DataChunkIterator from hdmf.testing import TestCase from pynwb import NWBFile +from pynwb.testing.mock.file import mock_NWBFile from spikeinterface.core.generate import ( generate_ground_truth_recording, generate_recording, @@ -394,6 +397,36 @@ def test_variable_offsets_assertion(self): ) +def test_error_with_multiple_offset(): + # Generate a mock recording with 5 channels and 1 second duration + recording = generate_recording(num_channels=5, durations=[1.0]) + # Rename channels to specific identifiers for clarity in error messages + recording = recording.rename_channels(new_channel_ids=["a", "b", "c", "d", "e"]) + # Set different offsets for the channels + recording.set_channel_offsets(offsets=[0, 0, 1, 1, 2]) + + # Create a mock NWBFile object + nwbfile = mock_NWBFile() + + # Expected error message + expected_message_lines = [ + "Recording extractors with heterogeneous offsets are not supported.", + "Multiple offsets were found per channel IDs:", + " Offset 0: Channel IDs ['a', 'b']", + " Offset 1: Channel IDs ['c', 'd']", + " Offset 2: Channel IDs ['e']", + ] + expected_message = "\n".join(expected_message_lines) + + # Use re.escape to escape any special regex characters in the expected message + expected_message_regex = re.escape(expected_message) + + # Attempt to add electrical series to the NWB file + # Expecting a ValueError due to multiple offsets, matching the expected message + with pytest.raises(ValueError, match=expected_message_regex): + add_electrical_series_to_nwbfile(recording=recording, nwbfile=nwbfile) + + class TestAddElectricalSeriesChunking(unittest.TestCase): @classmethod def setUpClass(cls): From 60c5e2a9a1736c237ce46f1545abbe0a318070e6 Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Thu, 24 Oct 2024 11:25:13 +1100 Subject: [PATCH 08/28] Fix Failing Dailies (#1113) --- .github/workflows/all_os_versions.txt | 1 + .github/workflows/all_python_versions.txt | 1 + .github/workflows/dailies.yml | 23 +++++++++++++++++ .github/workflows/deploy-tests.yml | 29 +++++++++++++++------- .github/workflows/dev-testing.yml | 1 - .github/workflows/doctests.yml | 2 -- .github/workflows/live-service-testing.yml | 2 -- .github/workflows/testing.yml | 3 --- CHANGELOG.md | 1 + 9 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/all_os_versions.txt create mode 100644 .github/workflows/all_python_versions.txt diff --git a/.github/workflows/all_os_versions.txt b/.github/workflows/all_os_versions.txt new file mode 100644 index 000000000..93d2aa0a7 --- /dev/null +++ b/.github/workflows/all_os_versions.txt @@ -0,0 +1 @@ +["ubuntu-latest", "macos-latest", "windows-latest", "macos-13"] diff --git a/.github/workflows/all_python_versions.txt b/.github/workflows/all_python_versions.txt new file mode 100644 index 000000000..7d1db34cd --- /dev/null +++ b/.github/workflows/all_python_versions.txt @@ -0,0 +1 @@ +["3.9", "3.10", "3.11", "3.12"] diff --git a/.github/workflows/dailies.yml b/.github/workflows/dailies.yml index 459246091..136590652 100644 --- a/.github/workflows/dailies.yml +++ b/.github/workflows/dailies.yml @@ -6,6 +6,18 @@ on: - cron: "0 4 * * *" # Daily at 8PM PST, 11PM EST, 5AM CET to avoid working hours jobs: + load_python_and_os_versions: + runs-on: ubuntu-latest + outputs: + ALL_PYTHON_VERSIONS: ${{ steps.load_python_versions.outputs.python_versions }} + ALL_OS_VERSIONS: ${{ steps.load_os_versions.outputs.os_versions }} + steps: + - uses: actions/checkout@v4 + - id: load_python_versions + run: echo "python_versions=$(cat ./.github/workflows/all_python_versions.txt)" >> "$GITHUB_OUTPUT" + - id: load_os_versions + run: echo "os_versions=$(cat ./.github/workflows/all_os_versions.txt)" >> "$GITHUB_OUTPUT" + build-and-upload-docker-image-dev: uses: ./.github/workflows/build_and_upload_docker_image_dev.yml secrets: @@ -13,25 +25,36 @@ jobs: DOCKER_UPLOADER_PASSWORD: ${{ secrets.DOCKER_UPLOADER_PASSWORD }} run-daily-tests: + needs: load_python_and_os_versions uses: ./.github/workflows/testing.yml secrets: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + python-versions: ${{ needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} run-daily-dev-tests: + needs: load_python_and_os_versions uses: ./.github/workflows/dev-testing.yml secrets: DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} + with: + python-versions: ${{ needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} run-daily-live-service-testing: + needs: load_python_and_os_versions uses: ./.github/workflows/live-service-testing.yml secrets: DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} + with: + python-versions: ${{ needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} run-daily-neuroconv-docker-testing: uses: ./.github/workflows/neuroconv_docker_testing.yml diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 4f67d15de..a18fe8310 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -13,6 +13,17 @@ concurrency: cancel-in-progress: true jobs: + load_python_and_os_versions: + runs-on: ubuntu-latest + outputs: + ALL_PYTHON_VERSIONS: ${{ steps.load_python_versions.outputs.python_versions }} + ALL_OS_VERSIONS: ${{ steps.load_os_versions.outputs.os_versions }} + steps: + - uses: actions/checkout@v4 + - id: load_python_versions + run: echo "python_versions=$(cat ./.github/workflows/all_python_versions.txt)" >> "$GITHUB_OUTPUT" + - id: load_os_versions + run: echo "os_versions=$(cat ./.github/workflows/all_os_versions.txt)" >> "$GITHUB_OUTPUT" assess-file-changes: uses: ./.github/workflows/assess-file-changes.yml @@ -31,7 +42,7 @@ jobs: 0 run-tests: - needs: assess-file-changes + needs: [assess-file-changes, load_python_and_os_versions] if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/testing.yml secrets: @@ -40,28 +51,28 @@ jobs: S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: # Ternary operator: condition && value_if_true || value_if_false - python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || '["3.9", "3.10", "3.11", "3.12"]' }} - os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || '["ubuntu-latest", "macos-latest", "macos-13", "windows-latest"]' }} + python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} # If the conversion gallery is the only thing that changed, run doctests only run-doctests-only: - needs: assess-file-changes + needs: [assess-file-changes, load_python_and_os_versions] if: ${{ needs.assess-file-changes.outputs.CONVERSION_GALLERY_CHANGED == 'true' && needs.assess-file-changes.outputs.SOURCE_CHANGED != 'true' }} uses: ./.github/workflows/doctests.yml with: # Ternary operator: condition && value_if_true || value_if_false - python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || '["3.9", "3.10", "3.11", "3.12"]' }} - os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || '["ubuntu-latest", "macos-latest", "macos-13", "windows-latest"]' }} + python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} run-live-service-tests: - needs: assess-file-changes + needs: [assess-file-changes, load_python_and_os_versions] if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} with: # Ternary operator: condition && value_if_true || value_if_false - python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || '["3.9", "3.10", "3.11", "3.12"]' }} - os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || '["ubuntu-latest", "macos-latest", "macos-13", "windows-latest"]' }} + python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} check-final-status: diff --git a/.github/workflows/dev-testing.yml b/.github/workflows/dev-testing.yml index 31b5329a8..65c011653 100644 --- a/.github/workflows/dev-testing.yml +++ b/.github/workflows/dev-testing.yml @@ -7,7 +7,6 @@ on: description: 'List of Python versions to use in matrix, as JSON string' required: true type: string - default: '["3.9", "3.10", "3.11", "3.12"]' secrets: DANDI_API_KEY: required: true diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml index e492eda0c..5d96bf4a3 100644 --- a/.github/workflows/doctests.yml +++ b/.github/workflows/doctests.yml @@ -6,12 +6,10 @@ on: description: 'List of Python versions to use in matrix, as JSON string' required: true type: string - default: '["3.9", "3.10", "3.11", "3.12"]' os-versions: description: 'List of OS versions to use in matrix, as JSON string' required: true type: string - default: '["ubuntu-latest", "macos-latest", "windows-latest"]' jobs: diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index b9a425a8d..24eda7bc3 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -7,12 +7,10 @@ on: description: 'List of Python versions to use in matrix, as JSON string' required: true type: string - default: '["3.9", "3.10", "3.11", "3.12"]' os-versions: description: 'List of OS versions to use in matrix, as JSON string' required: true type: string - default: '["ubuntu-latest", "macos-latest", "windows-latest"]' secrets: DANDI_API_KEY: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 06de82c4c..d8c5bb9fd 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -7,13 +7,10 @@ on: description: 'List of Python versions to use in matrix, as JSON string' required: true type: string - default: '["3.9", "3.10", "3.11", "3.12"]' os-versions: description: 'List of OS versions to use in matrix, as JSON string' required: true type: string - default: '["ubuntu-latest", "macos-latest", "windows-latest"]' - secrets: AWS_ACCESS_KEY_ID: diff --git a/CHANGELOG.md b/CHANGELOG.md index 931383066..268a773e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Deprecations ## Bug Fixes +* Fixed dailies [PR #1113](https://github.com/catalystneuro/neuroconv/pull/1113) ## Features * Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068) From 1897b0047d1945b99ed23476532ae5d9d5f162d0 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 1 Nov 2024 08:38:21 -0600 Subject: [PATCH 09/28] Fix installation extras (#1118) --- CHANGELOG.md | 1 + pyproject.toml | 259 +++++++++++++++++- setup.py | 56 ---- tests/test_on_data/icephys/__init__.py | 0 .../test_on_data/icephys/test_gin_icephys.py | 31 +-- tests/test_on_data/setup_paths.py | 15 +- 6 files changed, 272 insertions(+), 90 deletions(-) delete mode 100644 setup.py create mode 100644 tests/test_on_data/icephys/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 268a773e9..3fef5faff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Deprecations ## Bug Fixes +* Fixed formatwise installation from pipy [PR #1118](https://github.com/catalystneuro/neuroconv/pull/1118) * Fixed dailies [PR #1113](https://github.com/catalystneuro/neuroconv/pull/1113) ## Features diff --git a/pyproject.toml b/pyproject.toml index d7cf25813..2ee338c8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,9 @@ dependencies = [ "Changelog" = "https://github.com/catalystneuro/neuroconv/blob/main/CHANGELOG.md" +[project.scripts] +neuroconv = "neuroconv.tools.yaml_conversion_specification._yaml_conversion_specification:run_conversion_from_yaml_cli" + [project.optional-dependencies] test = [ "pytest", @@ -83,17 +86,265 @@ docs = [ "spikeinterface>=0.101.0", # Needed for the API documentation "pydata_sphinx_theme==0.12.0" ] + dandi = ["dandi>=0.58.1"] compressors = ["hdf5plugin"] aws = ["boto3"] -[tool.setuptools.packages.find] -where = ["src"] +########################## +# Modality-specific Extras +########################## +## Text +csv = [ +] +excel = [ + "openpyxl", + "xlrd", +] +text = [ + "neuroconv[csv]", + "neuroconv[excel]", +] -[project.scripts] -neuroconv = "neuroconv.tools.yaml_conversion_specification._yaml_conversion_specification:run_conversion_from_yaml_cli" +## Behavior +audio = [ + "ndx-sound>=0.2.0", +] +sleap = [ + "av>=10.0.0", + "sleap-io>=0.0.2,<0.0.12; python_version<'3.9'", + "sleap-io>=0.0.2; python_version>='3.9'", +] +deeplabcut = [ + "ndx-pose==0.1.1", + "tables; platform_system != 'Darwin'", + "tables>=3.10.1; platform_system == 'Darwin' and python_version >= '3.10'", +] +fictrac = [ +] +video = [ + "opencv-python-headless>=4.8.1.78", +] +lightningpose = [ + "ndx-pose==0.1.1", + "neuroconv[video]", +] +medpc = [ + "ndx-events==0.2.0", +] +behavior = [ + "neuroconv[sleap]", + "neuroconv[audio]", + "neuroconv[deeplabcut]", + "neuroconv[fictrac]", + "neuroconv[video]", + "neuroconv[lightningpose]", + "neuroconv[medpc]", + "ndx-miniscope>=0.5.1", # This is for the miniscope behavior data interface, not sure is needed +] + + +## Ecephys +alphaomega = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +axona = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +biocam = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +blackrock = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +cellexplorer = [ + "hdf5storage>=0.1.18", + "neo>=0.13.3", + "pymatreader>=0.0.32", + "spikeinterface>=0.101.0", +] +edf = [ + "neo>=0.13.3", + "pyedflib>=0.1.36", + "spikeinterface>=0.101.0", +] +intan = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +kilosort = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] + +maxwell = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +mcsraw = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +mearec = [ + "MEArec>=1.8.0", + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +neuralynx = [ + "natsort>=7.1.1", + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +neuroscope = [ + "lxml>=4.6.5", + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +openephys = [ + "lxml>=4.9.4", + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +phy = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +plexon = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", + "zugbruecke >= 0.2.1; platform_system != 'Windows'", +] +spike2 = [ + "neo>=0.13.3", + "sonpy>=1.7.1; python_version=='3.9' and platform_system != 'Darwin'", + "spikeinterface>=0.101.0", +] +spikegadgets = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +spikeglx = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +tdt = [ + "neo>=0.13.3", + "spikeinterface>=0.101.0", +] +ecephys = [ + "neuroconv[alphaomega]", + "neuroconv[axona]", + "neuroconv[biocam]", + "neuroconv[blackrock]", + "neuroconv[cellexplorer]", + "neuroconv[edf]", + "neuroconv[intan]", + "neuroconv[kilosort]", + "neuroconv[maxwell]", + "neuroconv[mcsraw]", + "neuroconv[mearec]", + "neuroconv[neuralynx]", + "neuroconv[neuroscope]", + "neuroconv[openephys]", + "neuroconv[phy]", + "neuroconv[plexon]", + "neuroconv[spike2]", + "neuroconv[spikegadgets]", + "neuroconv[spikeglx]", + "neuroconv[tdt]", +] + +## Icephys +abf = [ + "ndx-dandi-icephys>=0.4.0", + "neo>=0.13.2", +] +icephys = [ + "neuroconv[abf]", +] + +## Ophys +brukertiff = [ + "roiextractors>=0.5.7", + "tifffile>=2023.3.21", +] +caiman = [ + "roiextractors>=0.5.7", +] +cnmfe = [ + "roiextractors>=0.5.7", +] +extract = [ + "roiextractors>=0.5.7", +] +hdf5 = [ + "roiextractors>=0.5.7", +] +micromanagertiff = [ + "roiextractors>=0.5.7", + "tifffile>=2023.3.21", +] +miniscope = [ + "natsort>=8.3.1", + "ndx-miniscope>=0.5.1", + "roiextractors>=0.5.7", +] +sbx = [ + "roiextractors>=0.5.7", +] +scanimage = [ + "roiextractors>=0.5.7", + "scanimage-tiff-reader>=1.4.1", +] +sima = [ + "roiextractors>=0.5.7", +] +suite2p = [ + "roiextractors>=0.5.7", +] +tdt_fp = [ + "ndx-fiber-photometry", + "roiextractors>=0.5.7", + "tdt", +] +tiff = [ + "roiextractors>=0.5.7", + "tiffile>=2018.10.18", +] +ophys = [ + "neuroconv[brukertiff]", + "neuroconv[caiman]", + "neuroconv[cnmfe]", + "neuroconv[extract]", + "neuroconv[hdf5]", + "neuroconv[micromanagertiff]", + "neuroconv[miniscope]", + "neuroconv[sbx]", + "neuroconv[scanimage]", + "neuroconv[sima]", + "neuroconv[suite2p]", + "neuroconv[tdt_fp]", + "neuroconv[tiff]", +] +# Note these are references to the package in pipy (not local) +full = [ + "neuroconv[aws]", + "neuroconv[compressors]", + "neuroconv[dandi]", + "neuroconv[behavior]", + "neuroconv[ecephys]", + "neuroconv[icephys]", + "neuroconv[ophys]", + "neuroconv[text]", +] +[tool.setuptools.packages.find] +where = ["src"] [tool.pytest.ini_options] minversion = "6.0" diff --git a/setup.py b/setup.py deleted file mode 100644 index 53314e7e3..000000000 --- a/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -import platform -import sys -from collections import defaultdict -from pathlib import Path -from shutil import copy - -from setuptools import setup - -root = Path(__file__).parent - - -def read_requirements(file): - """Read requirements from a file.""" - with open(root / file) as f: - return f.readlines() - - -extras_require = defaultdict(list) -extras_require["full"] = ["dandi>=0.58.1", "hdf5plugin", "boto3"] - -for modality in ["ophys", "ecephys", "icephys", "behavior", "text"]: - modality_path = root / "src" / "neuroconv" / "datainterfaces" / modality - modality_requirement_file = modality_path / "requirements.txt" - if modality_requirement_file.exists(): - modality_requirements = read_requirements(modality_requirement_file) - extras_require["full"].extend(modality_requirements) - extras_require[modality] = modality_requirements - else: - modality_requirements = [] - - format_subpaths = [path for path in modality_path.iterdir() if path.is_dir() and path.name != "__pycache__"] - for format_subpath in format_subpaths: - format_requirement_file = format_subpath / "requirements.txt" - extras_require[format_subpath.name] = modality_requirements.copy() - if format_requirement_file.exists(): - format_requirements = read_requirements(format_requirement_file) - extras_require["full"].extend(format_requirements) - extras_require[modality].extend(format_requirements) - extras_require[format_subpath.name].extend(format_requirements) - -# Create a local copy for the gin test configuration file based on the master file `base_gin_test_config.json` -gin_config_file_base = root / "base_gin_test_config.json" -gin_config_file_local = root / "tests/test_on_data/gin_test_config.json" -if not gin_config_file_local.exists(): - gin_config_file_local.parent.mkdir(parents=True, exist_ok=True) - copy(src=gin_config_file_base, dst=gin_config_file_local) - -# Bug related to sonpy on M1 Mac being installed but not running properly -if sys.platform == "darwin" and platform.processor() == "arm": - extras_require.pop("spike2", None) - extras_require["ecephys"] = [req for req in extras_require["ecephys"] if "sonpy" not in req] - extras_require["full"] = [req for req in extras_require["full"] if "sonpy" not in req] - -setup( - extras_require=extras_require, -) diff --git a/tests/test_on_data/icephys/__init__.py b/tests/test_on_data/icephys/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_on_data/icephys/test_gin_icephys.py b/tests/test_on_data/icephys/test_gin_icephys.py index 2d71e0636..2c8f4e22a 100644 --- a/tests/test_on_data/icephys/test_gin_icephys.py +++ b/tests/test_on_data/icephys/test_gin_icephys.py @@ -1,8 +1,5 @@ -import os -import tempfile import unittest from datetime import datetime -from pathlib import Path import numpy.testing as npt import pytest @@ -11,7 +8,8 @@ from neuroconv import NWBConverter from neuroconv.datainterfaces import AbfInterface from neuroconv.tools.neo import get_number_of_electrodes, get_number_of_segments -from neuroconv.utils import load_dict_from_file + +from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH try: from parameterized import param, parameterized @@ -19,30 +17,9 @@ HAVE_PARAMETERIZED = True except ImportError: HAVE_PARAMETERIZED = False -# Load the configuration for the data tests -test_config_dict = load_dict_from_file(Path(__file__).parent.parent / "gin_test_config.json") - -# GIN dataset: https://gin.g-node.org/NeuralEnsemble/ephy_testing_data -if os.getenv("CI"): - LOCAL_PATH = Path(".") # Must be set to "." for CI - print("Running GIN tests on Github CI!") -else: - # Override LOCAL_PATH in the `gin_test_config.json` file to a point on your system that contains the dataset folder - # Use DANDIHub at hub.dandiarchive.org for open, free use of data found in the /shared/catalystneuro/ directory - LOCAL_PATH = Path(test_config_dict["LOCAL_PATH"]) - print("Running GIN tests locally!") -DATA_PATH = LOCAL_PATH / "ephy_testing_data" -HAVE_DATA = DATA_PATH.exists() - -if test_config_dict["SAVE_OUTPUTS"]: - OUTPUT_PATH = LOCAL_PATH / "example_nwb_output" - OUTPUT_PATH.mkdir(exist_ok=True) -else: - OUTPUT_PATH = Path(tempfile.mkdtemp()) + if not HAVE_PARAMETERIZED: pytest.fail("parameterized module is not installed! Please install (`pip install parameterized`).") -if not HAVE_DATA: - pytest.fail(f"No ephy_testing_data folder found in location: {DATA_PATH}!") def custom_name_func(testcase_func, param_num, param): @@ -59,7 +36,7 @@ class TestIcephysNwbConversions(unittest.TestCase): param( data_interface=AbfInterface, interface_kwargs=dict( - file_paths=[str(DATA_PATH / "axon" / "File_axon_1.abf")], + file_paths=[str(ECEPHY_DATA_PATH / "axon" / "File_axon_1.abf")], icephys_metadata={ "recording_sessions": [ {"abf_file_name": "File_axon_1.abf", "icephys_experiment_type": "voltage_clamp"} diff --git a/tests/test_on_data/setup_paths.py b/tests/test_on_data/setup_paths.py index 9554d27eb..3f7bf4123 100644 --- a/tests/test_on_data/setup_paths.py +++ b/tests/test_on_data/setup_paths.py @@ -1,6 +1,7 @@ import os import tempfile from pathlib import Path +from shutil import copy from neuroconv.utils import load_dict_from_file @@ -17,9 +18,17 @@ else: # Override LOCAL_PATH in the `gin_test_config.json` file to a point on your system that contains the dataset folder # Use DANDIHub at hub.dandiarchive.org for open, free use of data found in the /shared/catalystneuro/ directory - file_path = Path(__file__).parent / "gin_test_config.json" - assert file_path.exists(), f"File not found: {file_path}" - test_config_dict = load_dict_from_file(file_path) + test_config_path = Path(__file__).parent / "gin_test_config.json" + config_file_exists = test_config_path.exists() + if not config_file_exists: + + root = test_config_path.parent.parent + base_test_config_path = root / "base_gin_test_config.json" + + test_config_path.parent.mkdir(parents=True, exist_ok=True) + copy(src=base_test_config_path, dst=test_config_path) + + test_config_dict = load_dict_from_file(test_config_path) LOCAL_PATH = Path(test_config_dict["LOCAL_PATH"]) if test_config_dict["SAVE_OUTPUTS"]: From 2fc153adc566a7b53ee1e855407cc512c6d0d204 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 08:58:51 -0600 Subject: [PATCH 10/28] [pre-commit.ci] pre-commit autoupdate (#1120) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Heberto Mayorquin --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b2c1d900c..6a6977633 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: exclude: ^docs/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.7.1 hooks: - id: ruff args: [ --fix ] From f50a6f55b213316e0845f5a956e41109812921b9 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 1 Nov 2024 15:15:15 -0600 Subject: [PATCH 11/28] Release 0.6.5 --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fef5faff..bc816e350 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Upcoming +# v0.6.5 (November 1, 2024) + ## Deprecations ## Bug Fixes From 419ab11104ff9e67d63f6400aacb3e12081c90d1 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 1 Nov 2024 15:19:18 -0600 Subject: [PATCH 12/28] bump version --- CHANGELOG.md | 8 ++++++++ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc816e350..fa679434a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Upcoming +## Deprecations + +## Bug Fixes + +## Features + +## Improvements + # v0.6.5 (November 1, 2024) ## Deprecations diff --git a/pyproject.toml b/pyproject.toml index 2ee338c8c..a83380467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "neuroconv" -version = "0.6.5" +version = "0.6.6" description = "Convert data from proprietary formats to NWB format." readme = "README.md" authors = [ From cbf68d4c6d7a3a5c942586e228a90063fe161b22 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 09:59:53 -0600 Subject: [PATCH 13/28] Change deprecated `get_schema_from_method_signature` to `get_json_schema_from_method_signature` (#1130) --- .../behavior/lightningpose/lightningposeconverter.py | 6 +++--- .../datainterfaces/ecephys/axona/axonadatainterface.py | 4 ++-- .../ecephys/blackrock/blackrockdatainterface.py | 6 +++--- .../ecephys/openephys/openephysbinarydatainterface.py | 4 ++-- .../ecephys/openephys/openephyssortingdatainterface.py | 4 ++-- .../datainterfaces/ecephys/spike2/spike2datainterface.py | 4 ++-- .../datainterfaces/ecephys/spikeglx/spikeglxconverter.py | 4 ++-- .../ecephys/spikeglx/spikeglxnidqinterface.py | 4 ++-- .../datainterfaces/icephys/baseicephysinterface.py | 4 ++-- .../datainterfaces/ophys/brukertiff/brukertiffconverter.py | 6 +++--- .../datainterfaces/ophys/miniscope/miniscopeconverter.py | 4 ++-- src/neuroconv/tools/testing/mock_interfaces.py | 6 +++--- src/neuroconv/utils/__init__.py | 2 +- .../test_get_json_schema_from_method_signature.py | 6 +++--- 14 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py index dee848f19..505aa144d 100644 --- a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py +++ b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposeconverter.py @@ -10,7 +10,7 @@ from neuroconv.utils import ( DeepDict, dict_deep_update, - get_schema_from_method_signature, + get_json_schema_from_method_signature, ) @@ -24,7 +24,7 @@ class LightningPoseConverter(NWBConverter): @classmethod def get_source_schema(cls): - return get_schema_from_method_signature(cls) + return get_json_schema_from_method_signature(cls) @validate_call def __init__( @@ -71,7 +71,7 @@ def __init__( self.data_interface_objects.update(dict(LabeledVideo=VideoInterface(file_paths=[labeled_video_file_path]))) def get_conversion_options_schema(self) -> dict: - conversion_options_schema = get_schema_from_method_signature( + conversion_options_schema = get_json_schema_from_method_signature( method=self.add_to_nwbfile, exclude=["nwbfile", "metadata"] ) diff --git a/src/neuroconv/datainterfaces/ecephys/axona/axonadatainterface.py b/src/neuroconv/datainterfaces/ecephys/axona/axonadatainterface.py index ba6adf4a1..3c8a1067c 100644 --- a/src/neuroconv/datainterfaces/ecephys/axona/axonadatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/axona/axonadatainterface.py @@ -12,7 +12,7 @@ from ..baserecordingextractorinterface import BaseRecordingExtractorInterface from ....basedatainterface import BaseDataInterface from ....tools.nwb_helpers import get_module -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class AxonaRecordingInterface(BaseRecordingExtractorInterface): @@ -186,7 +186,7 @@ class AxonaPositionDataInterface(BaseDataInterface): @classmethod def get_source_schema(cls) -> dict: - return get_schema_from_method_signature(cls.__init__) + return get_json_schema_from_method_signature(cls.__init__) def __init__(self, file_path: str): """ diff --git a/src/neuroconv/datainterfaces/ecephys/blackrock/blackrockdatainterface.py b/src/neuroconv/datainterfaces/ecephys/blackrock/blackrockdatainterface.py index e84719431..7e6e499d1 100644 --- a/src/neuroconv/datainterfaces/ecephys/blackrock/blackrockdatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/blackrock/blackrockdatainterface.py @@ -6,7 +6,7 @@ from .header_tools import _parse_nev_basic_header, _parse_nsx_basic_header from ..baserecordingextractorinterface import BaseRecordingExtractorInterface from ..basesortingextractorinterface import BaseSortingExtractorInterface -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class BlackrockRecordingInterface(BaseRecordingExtractorInterface): @@ -19,7 +19,7 @@ class BlackrockRecordingInterface(BaseRecordingExtractorInterface): @classmethod def get_source_schema(cls): - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["block_index", "seg_index"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["block_index", "seg_index"]) source_schema["properties"]["file_path"][ "description" ] = "Path to the Blackrock file with suffix being .ns1, .ns2, .ns3, .ns4m .ns4, or .ns6." @@ -85,7 +85,7 @@ class BlackrockSortingInterface(BaseSortingExtractorInterface): @classmethod def get_source_schema(cls) -> dict: - metadata_schema = get_schema_from_method_signature(method=cls.__init__) + metadata_schema = get_json_schema_from_method_signature(method=cls.__init__) metadata_schema["additionalProperties"] = True metadata_schema["properties"]["file_path"].update(description="Path to Blackrock .nev file.") return metadata_schema diff --git a/src/neuroconv/datainterfaces/ecephys/openephys/openephysbinarydatainterface.py b/src/neuroconv/datainterfaces/ecephys/openephys/openephysbinarydatainterface.py index 371b96f94..ef67fde40 100644 --- a/src/neuroconv/datainterfaces/ecephys/openephys/openephysbinarydatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/openephys/openephysbinarydatainterface.py @@ -3,7 +3,7 @@ from pydantic import DirectoryPath from ..baserecordingextractorinterface import BaseRecordingExtractorInterface -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class OpenEphysBinaryRecordingInterface(BaseRecordingExtractorInterface): @@ -29,7 +29,7 @@ def get_stream_names(cls, folder_path: DirectoryPath) -> list[str]: @classmethod def get_source_schema(cls) -> dict: """Compile input schema for the RecordingExtractor.""" - source_schema = get_schema_from_method_signature( + source_schema = get_json_schema_from_method_signature( method=cls.__init__, exclude=["recording_id", "experiment_id", "stub_test"] ) source_schema["properties"]["folder_path"][ diff --git a/src/neuroconv/datainterfaces/ecephys/openephys/openephyssortingdatainterface.py b/src/neuroconv/datainterfaces/ecephys/openephys/openephyssortingdatainterface.py index 2d53e6331..ecf2067f1 100644 --- a/src/neuroconv/datainterfaces/ecephys/openephys/openephyssortingdatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/openephys/openephyssortingdatainterface.py @@ -1,7 +1,7 @@ from pydantic import DirectoryPath, validate_call from ..basesortingextractorinterface import BaseSortingExtractorInterface -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class OpenEphysSortingInterface(BaseSortingExtractorInterface): @@ -14,7 +14,7 @@ class OpenEphysSortingInterface(BaseSortingExtractorInterface): @classmethod def get_source_schema(cls) -> dict: """Compile input schema for the SortingExtractor.""" - metadata_schema = get_schema_from_method_signature( + metadata_schema = get_json_schema_from_method_signature( method=cls.__init__, exclude=["recording_id", "experiment_id"] ) metadata_schema["properties"]["folder_path"].update( diff --git a/src/neuroconv/datainterfaces/ecephys/spike2/spike2datainterface.py b/src/neuroconv/datainterfaces/ecephys/spike2/spike2datainterface.py index ccd98a369..bf0ddc860 100644 --- a/src/neuroconv/datainterfaces/ecephys/spike2/spike2datainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spike2/spike2datainterface.py @@ -4,7 +4,7 @@ from ..baserecordingextractorinterface import BaseRecordingExtractorInterface from ....tools import get_package -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature def _test_sonpy_installation() -> None: @@ -29,7 +29,7 @@ class Spike2RecordingInterface(BaseRecordingExtractorInterface): @classmethod def get_source_schema(cls) -> dict: - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["smrx_channel_ids"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["smrx_channel_ids"]) source_schema.update(additionalProperties=True) source_schema["properties"]["file_path"].update(description="Path to .smrx file.") return source_schema diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py index 6aeb36cec..007c3177c 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py @@ -6,7 +6,7 @@ from .spikeglxdatainterface import SpikeGLXRecordingInterface from .spikeglxnidqinterface import SpikeGLXNIDQInterface from ....nwbconverter import ConverterPipe -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class SpikeGLXConverterPipe(ConverterPipe): @@ -23,7 +23,7 @@ class SpikeGLXConverterPipe(ConverterPipe): @classmethod def get_source_schema(cls): - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["streams"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["streams"]) source_schema["properties"]["folder_path"]["description"] = "Path to the folder containing SpikeGLX streams." return source_schema diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py index fab9e5b5f..3cf50080a 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py @@ -6,7 +6,7 @@ from .spikeglx_utils import get_session_start_time from ..baserecordingextractorinterface import BaseRecordingExtractorInterface from ....tools.signal_processing import get_rising_frames_from_ttl -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class SpikeGLXNIDQInterface(BaseRecordingExtractorInterface): @@ -22,7 +22,7 @@ class SpikeGLXNIDQInterface(BaseRecordingExtractorInterface): @classmethod def get_source_schema(cls) -> dict: - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["x_pitch", "y_pitch"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["x_pitch", "y_pitch"]) source_schema["properties"]["file_path"]["description"] = "Path to SpikeGLX .nidq file." return source_schema diff --git a/src/neuroconv/datainterfaces/icephys/baseicephysinterface.py b/src/neuroconv/datainterfaces/icephys/baseicephysinterface.py index f8bad53d6..092ec1e36 100644 --- a/src/neuroconv/datainterfaces/icephys/baseicephysinterface.py +++ b/src/neuroconv/datainterfaces/icephys/baseicephysinterface.py @@ -7,9 +7,9 @@ from ...baseextractorinterface import BaseExtractorInterface from ...tools.nwb_helpers import make_nwbfile_from_metadata from ...utils import ( + get_json_schema_from_method_signature, get_metadata_schema_for_icephys, get_schema_from_hdmf_class, - get_schema_from_method_signature, ) @@ -22,7 +22,7 @@ class BaseIcephysInterface(BaseExtractorInterface): @classmethod def get_source_schema(cls) -> dict: - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=[]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=[]) return source_schema @validate_call diff --git a/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffconverter.py b/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffconverter.py index 86e8edc1f..2a67da720 100644 --- a/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffconverter.py +++ b/src/neuroconv/datainterfaces/ophys/brukertiff/brukertiffconverter.py @@ -9,7 +9,7 @@ ) from ....nwbconverter import NWBConverter from ....tools.nwb_helpers import make_or_load_nwbfile -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class BrukerTiffMultiPlaneConverter(NWBConverter): @@ -24,7 +24,7 @@ class BrukerTiffMultiPlaneConverter(NWBConverter): @classmethod def get_source_schema(cls): - source_schema = get_schema_from_method_signature(cls) + source_schema = get_json_schema_from_method_signature(cls) source_schema["properties"]["folder_path"][ "description" ] = "The folder that contains the Bruker TIF image files (.ome.tif) and configuration files (.xml, .env)." @@ -138,7 +138,7 @@ class BrukerTiffSinglePlaneConverter(NWBConverter): @classmethod def get_source_schema(cls): - return get_schema_from_method_signature(cls) + return get_json_schema_from_method_signature(cls) def get_conversion_options_schema(self): interface_name = list(self.data_interface_objects.keys())[0] diff --git a/src/neuroconv/datainterfaces/ophys/miniscope/miniscopeconverter.py b/src/neuroconv/datainterfaces/ophys/miniscope/miniscopeconverter.py index cfee8f027..d1a0fb701 100644 --- a/src/neuroconv/datainterfaces/ophys/miniscope/miniscopeconverter.py +++ b/src/neuroconv/datainterfaces/ophys/miniscope/miniscopeconverter.py @@ -6,7 +6,7 @@ from ... import MiniscopeBehaviorInterface, MiniscopeImagingInterface from ....nwbconverter import NWBConverter from ....tools.nwb_helpers import make_or_load_nwbfile -from ....utils import get_schema_from_method_signature +from ....utils import get_json_schema_from_method_signature class MiniscopeConverter(NWBConverter): @@ -19,7 +19,7 @@ class MiniscopeConverter(NWBConverter): @classmethod def get_source_schema(cls): - source_schema = get_schema_from_method_signature(cls) + source_schema = get_json_schema_from_method_signature(cls) source_schema["properties"]["folder_path"]["description"] = "The path to the main Miniscope folder." return source_schema diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index 04dc57250..4ba7bb639 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -20,7 +20,7 @@ from ...datainterfaces.ophys.basesegmentationextractorinterface import ( BaseSegmentationExtractorInterface, ) -from ...utils import ArrayType, get_schema_from_method_signature +from ...utils import ArrayType, get_json_schema_from_method_signature class MockBehaviorEventInterface(BaseTemporalAlignmentInterface): @@ -30,7 +30,7 @@ class MockBehaviorEventInterface(BaseTemporalAlignmentInterface): @classmethod def get_source_schema(cls) -> dict: - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["event_times"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["event_times"]) source_schema["additionalProperties"] = True return source_schema @@ -74,7 +74,7 @@ class MockSpikeGLXNIDQInterface(SpikeGLXNIDQInterface): @classmethod def get_source_schema(cls) -> dict: - source_schema = get_schema_from_method_signature(method=cls.__init__, exclude=["ttl_times"]) + source_schema = get_json_schema_from_method_signature(method=cls.__init__, exclude=["ttl_times"]) source_schema["additionalProperties"] = True return source_schema diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index f59cf59c5..c0061a983 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -12,7 +12,7 @@ get_base_schema, get_metadata_schema_for_icephys, get_schema_from_hdmf_class, - get_schema_from_method_signature, + get_json_schema_from_method_signature, unroot_schema, get_json_schema_from_method_signature, ) diff --git a/tests/test_minimal/test_utils/test_get_json_schema_from_method_signature.py b/tests/test_minimal/test_utils/test_get_json_schema_from_method_signature.py index 10139f7e1..e6fe27e16 100644 --- a/tests/test_minimal/test_utils/test_get_json_schema_from_method_signature.py +++ b/tests/test_minimal/test_utils/test_get_json_schema_from_method_signature.py @@ -110,7 +110,7 @@ def basic_method( assert test_json_schema == expected_json_schema -def test_get_schema_from_method_signature_init(): +def test_get_json_schema_from_method_signature_init(): """Test that 'self' is automatically skipped.""" class TestClass: @@ -141,7 +141,7 @@ def __init__( assert test_json_schema == expected_json_schema -def test_get_schema_from_method_signature_class_static(): +def test_get_json_schema_from_method_signature_class_static(): """Ensuring that signature assembly prior to passing to Pydantic is not affected by bound or static methods.""" class TestClass: @@ -165,7 +165,7 @@ def test_static_method(integer: int, string: str, boolean: bool): assert test_json_schema == expected_json_schema -def test_get_schema_from_method_signature_class_method(): +def test_get_json_schema_from_method_signature_class_method(): """Test that 'cls' is automatically skipped.""" class TestClass: From 9448f95f1b92b4035b792a7d87571dedcd2fb044 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 11 Nov 2024 10:02:38 -0600 Subject: [PATCH 14/28] Remove deprecations and add docstring to `BaseImagingExtractorInterface` (#1126) --- CHANGELOG.md | 1 + .../behavior/fictrac/fictracdatainterface.py | 14 ------ .../ecephys/baselfpextractorinterface.py | 4 -- .../baserecordingextractorinterface.py | 4 -- .../ophys/baseimagingextractorinterface.py | 45 ++++++++++--------- src/neuroconv/tools/neo/neo.py | 36 --------------- .../tools/spikeinterface/spikeinterface.py | 38 ---------------- .../tools/testing/mock_interfaces.py | 4 +- .../test_baseimagingextractorinterface.py | 15 ------- 9 files changed, 28 insertions(+), 133 deletions(-) delete mode 100644 tests/test_ophys/test_baseimagingextractorinterface.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fa679434a..75c6ea917 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Upcoming ## Deprecations +* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) ## Bug Fixes diff --git a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py index 1b9686fd1..1d822f919 100644 --- a/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/fictrac/fictracdatainterface.py @@ -1,6 +1,5 @@ import json import re -import warnings from datetime import datetime, timezone from pathlib import Path from typing import Optional, Union @@ -210,8 +209,6 @@ def add_to_nwbfile( self, nwbfile: NWBFile, metadata: Optional[dict] = None, - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 - compression_opts: Optional[int] = None, # TODO: remove completely after 10/1/2024 ): """ Parameters @@ -223,17 +220,6 @@ def add_to_nwbfile( """ import pandas as pd - # TODO: remove completely after 10/1/2024 - if compression is not None or compression_opts is not None: - warnings.warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) - fictrac_data_df = pd.read_csv(self.file_path, sep=",", header=None, names=self.columns_in_dat_file) # Get the timestamps diff --git a/src/neuroconv/datainterfaces/ecephys/baselfpextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/baselfpextractorinterface.py index 7ce6bb9e4..af16601bb 100644 --- a/src/neuroconv/datainterfaces/ecephys/baselfpextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/baselfpextractorinterface.py @@ -26,8 +26,6 @@ def add_to_nwbfile( starting_time: Optional[float] = None, write_as: Literal["raw", "lfp", "processed"] = "lfp", write_electrical_series: bool = True, - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 - compression_opts: Optional[int] = None, iterator_type: str = "v2", iterator_opts: Optional[dict] = None, ): @@ -38,8 +36,6 @@ def add_to_nwbfile( starting_time=starting_time, write_as=write_as, write_electrical_series=write_electrical_series, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, ) diff --git a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py index e2c747378..6d0df14c1 100644 --- a/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/baserecordingextractorinterface.py @@ -308,8 +308,6 @@ def add_to_nwbfile( starting_time: Optional[float] = None, write_as: Literal["raw", "lfp", "processed"] = "raw", write_electrical_series: bool = True, - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 - compression_opts: Optional[int] = None, iterator_type: Optional[str] = "v2", iterator_opts: Optional[dict] = None, always_write_timestamps: bool = False, @@ -388,8 +386,6 @@ def add_to_nwbfile( write_as=write_as, write_electrical_series=write_electrical_series, es_key=self.es_key, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, always_write_timestamps=always_write_timestamps, diff --git a/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py b/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py index 5125af3cc..9f88b861f 100644 --- a/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py @@ -1,6 +1,5 @@ """Author: Ben Dichter.""" -import warnings from typing import Literal, Optional import numpy as np @@ -46,17 +45,9 @@ def __init__( self.photon_series_type = photon_series_type def get_metadata_schema( - self, photon_series_type: Optional[Literal["OnePhotonSeries", "TwoPhotonSeries"]] = None + self, ) -> dict: - if photon_series_type is not None: - warnings.warn( - "The 'photon_series_type' argument is deprecated and will be removed in a future version. " - "Please set 'photon_series_type' during the initialization of the BaseImagingExtractorInterface instance.", - DeprecationWarning, - stacklevel=2, - ) - self.photon_series_type = photon_series_type metadata_schema = super().get_metadata_schema() metadata_schema["required"] = ["Ophys"] @@ -100,18 +91,9 @@ def get_metadata_schema( return metadata_schema def get_metadata( - self, photon_series_type: Optional[Literal["OnePhotonSeries", "TwoPhotonSeries"]] = None + self, ) -> DeepDict: - if photon_series_type is not None: - warnings.warn( - "The 'photon_series_type' argument is deprecated and will be removed in a future version. " - "Please set 'photon_series_type' during the initialization of the BaseImagingExtractorInterface instance.", - DeprecationWarning, - stacklevel=2, - ) - self.photon_series_type = photon_series_type - from ...tools.roiextractors import get_nwb_imaging_metadata metadata = super().get_metadata() @@ -147,6 +129,29 @@ def add_to_nwbfile( stub_test: bool = False, stub_frames: int = 100, ): + """ + Add imaging data to the NWB file + + Parameters + ---------- + nwbfile : NWBFile + The NWB file where the imaging data will be added. + metadata : dict, optional + Metadata for the NWBFile, by default None. + photon_series_type : {"TwoPhotonSeries", "OnePhotonSeries"}, optional + The type of photon series to be added, by default "TwoPhotonSeries". + photon_series_index : int, optional + The index of the photon series in the provided imaging data, by default 0. + parent_container : {"acquisition", "processing/ophys"}, optional + Specifies the parent container to which the photon series should be added, either as part of "acquisition" or + under the "processing/ophys" module, by default "acquisition". + stub_test : bool, optional + If True, only writes a small subset of frames for testing purposes, by default False. + stub_frames : int, optional + The number of frames to write when stub_test is True. Will use min(stub_frames, total_frames) to avoid + exceeding available frames, by default 100. + """ + from ...tools.roiextractors import add_imaging_to_nwbfile if stub_test: diff --git a/src/neuroconv/tools/neo/neo.py b/src/neuroconv/tools/neo/neo.py index 220c64de0..ccef706e5 100644 --- a/src/neuroconv/tools/neo/neo.py +++ b/src/neuroconv/tools/neo/neo.py @@ -214,7 +214,6 @@ def add_icephys_recordings( icephys_experiment_type: str = "voltage_clamp", stimulus_type: str = "not described", skip_electrodes: tuple[int] = (), - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 ): """ Add icephys recordings (stimulus/response pairs) to nwbfile object. @@ -230,16 +229,6 @@ def add_icephys_recordings( skip_electrodes : tuple, default: () Electrode IDs to skip. """ - # TODO: remove completely after 10/1/2024 - if compression is not None: - warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) n_segments = get_number_of_segments(neo_reader, block=0) @@ -380,7 +369,6 @@ def add_neo_to_nwb( neo_reader, nwbfile: pynwb.NWBFile, metadata: dict = None, - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 icephys_experiment_type: str = "voltage_clamp", stimulus_type: Optional[str] = None, skip_electrodes: tuple[int] = (), @@ -409,15 +397,6 @@ def add_neo_to_nwb( assert isinstance(nwbfile, pynwb.NWBFile), "'nwbfile' should be of type pynwb.NWBFile" # TODO: remove completely after 10/1/2024 - if compression is not None: - warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) add_device_from_metadata(nwbfile=nwbfile, modality="Icephys", metadata=metadata) @@ -443,7 +422,6 @@ def write_neo_to_nwb( overwrite: bool = False, nwbfile=None, metadata: dict = None, - compression: Optional[str] = None, # TODO: remove completely after 10/1/2024 icephys_experiment_type: Optional[str] = None, stimulus_type: Optional[str] = None, skip_electrodes: Optional[tuple] = (), @@ -499,9 +477,6 @@ def write_neo_to_nwb( Note that data intended to be added to the electrodes table of the NWBFile should be set as channel properties in the RecordingExtractor object. - compression: str (optional, defaults to "gzip") - Type of compression to use. Valid types are "gzip" and "lzf". - Set to None to disable all compression. icephys_experiment_type: str (optional) Type of Icephys experiment. Allowed types are: 'voltage_clamp', 'current_clamp' and 'izero'. If no value is passed, 'voltage_clamp' is used as default. @@ -518,17 +493,6 @@ def write_neo_to_nwb( assert save_path is None or nwbfile is None, "Either pass a save_path location, or nwbfile object, but not both!" - # TODO: remove completely after 10/1/2024 - if compression is not None: - warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) - if metadata is None: metadata = get_nwb_metadata(neo_reader=neo_reader) diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py index 1be86862a..5aa3c8925 100644 --- a/src/neuroconv/tools/spikeinterface/spikeinterface.py +++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py @@ -749,8 +749,6 @@ def add_electrical_series( write_as: Literal["raw", "processed", "lfp"] = "raw", es_key: str = None, write_scaled: bool = False, - compression: Optional[str] = None, - compression_opts: Optional[int] = None, iterator_type: Optional[str] = "v2", iterator_opts: Optional[dict] = None, ): @@ -772,8 +770,6 @@ def add_electrical_series( write_as=write_as, es_key=es_key, write_scaled=write_scaled, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, ) @@ -810,8 +806,6 @@ def add_electrical_series_to_nwbfile( write_as: Literal["raw", "processed", "lfp"] = "raw", es_key: str = None, write_scaled: bool = False, - compression: Optional[str] = None, - compression_opts: Optional[int] = None, iterator_type: Optional[str] = "v2", iterator_opts: Optional[dict] = None, always_write_timestamps: bool = False, @@ -847,7 +841,6 @@ def add_electrical_series_to_nwbfile( write_scaled : bool, default: False If True, writes the traces in uV with the right conversion. If False , the data is stored as it is and the right conversions factors are added to the nwbfile. - Only applies to compression="gzip". Controls the level of the GZIP. iterator_type: {"v2", None}, default: 'v2' The type of DataChunkIterator to use. 'v1' is the original DataChunkIterator of the hdmf data_utils. @@ -868,16 +861,6 @@ def add_electrical_series_to_nwbfile( Missing keys in an element of metadata['Ecephys']['ElectrodeGroup'] will be auto-populated with defaults whenever possible. """ - # TODO: remove completely after 10/1/2024 - if compression is not None or compression_opts is not None: - warnings.warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) assert write_as in [ "raw", @@ -1042,8 +1025,6 @@ def add_recording( es_key: Optional[str] = None, write_electrical_series: bool = True, write_scaled: bool = False, - compression: Optional[str] = "gzip", - compression_opts: Optional[int] = None, iterator_type: str = "v2", iterator_opts: Optional[dict] = None, ): @@ -1065,8 +1046,6 @@ def add_recording( es_key=es_key, write_electrical_series=write_electrical_series, write_scaled=write_scaled, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, ) @@ -1081,8 +1060,6 @@ def add_recording_to_nwbfile( es_key: Optional[str] = None, write_electrical_series: bool = True, write_scaled: bool = False, - compression: Optional[str] = "gzip", - compression_opts: Optional[int] = None, iterator_type: str = "v2", iterator_opts: Optional[dict] = None, always_write_timestamps: bool = False, @@ -1163,8 +1140,6 @@ def add_recording_to_nwbfile( write_as=write_as, es_key=es_key, write_scaled=write_scaled, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, always_write_timestamps=always_write_timestamps, @@ -1183,8 +1158,6 @@ def write_recording( es_key: Optional[str] = None, write_electrical_series: bool = True, write_scaled: bool = False, - compression: Optional[str] = "gzip", - compression_opts: Optional[int] = None, iterator_type: Optional[str] = "v2", iterator_opts: Optional[dict] = None, ): @@ -1209,8 +1182,6 @@ def write_recording( es_key=es_key, write_electrical_series=write_electrical_series, write_scaled=write_scaled, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, ) @@ -1228,8 +1199,6 @@ def write_recording_to_nwbfile( es_key: Optional[str] = None, write_electrical_series: bool = True, write_scaled: bool = False, - compression: Optional[str] = "gzip", - compression_opts: Optional[int] = None, iterator_type: Optional[str] = "v2", iterator_opts: Optional[dict] = None, ) -> pynwb.NWBFile: @@ -1303,11 +1272,6 @@ def write_recording_to_nwbfile( and electrodes are written to NWB. write_scaled: bool, default: True If True, writes the scaled traces (return_scaled=True) - compression: {None, 'gzip', 'lzp'}, default: 'gzip' - Type of compression to use. Set to None to disable all compression. - To use the `configure_backend` function, you should set this to None. - compression_opts: int, optional, default: 4 - Only applies to compression="gzip". Controls the level of the GZIP. iterator_type: {"v2", "v1", None} The type of DataChunkIterator to use. 'v1' is the original DataChunkIterator of the hdmf data_utils. @@ -1348,8 +1312,6 @@ def write_recording_to_nwbfile( es_key=es_key, write_electrical_series=write_electrical_series, write_scaled=write_scaled, - compression=compression, - compression_opts=compression_opts, iterator_type=iterator_type, iterator_opts=iterator_opts, ) diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index 4ba7bb639..44d1adf61 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -265,9 +265,9 @@ def __init__( self.verbose = verbose self.photon_series_type = photon_series_type - def get_metadata(self, photon_series_type: Optional[Literal["OnePhotonSeries", "TwoPhotonSeries"]] = None) -> dict: + def get_metadata(self) -> dict: session_start_time = datetime.now().astimezone() - metadata = super().get_metadata(photon_series_type=photon_series_type) + metadata = super().get_metadata() metadata["NWBFile"]["session_start_time"] = session_start_time return metadata diff --git a/tests/test_ophys/test_baseimagingextractorinterface.py b/tests/test_ophys/test_baseimagingextractorinterface.py deleted file mode 100644 index 863a978d2..000000000 --- a/tests/test_ophys/test_baseimagingextractorinterface.py +++ /dev/null @@ -1,15 +0,0 @@ -from hdmf.testing import TestCase - -from neuroconv.tools.testing.mock_interfaces import MockImagingInterface - - -class TestBaseImagingExtractorInterface(TestCase): - def setUp(self): - self.mock_imaging_interface = MockImagingInterface() - - def test_photon_series_type_warning_triggered_in_get_metadata(self): - with self.assertWarnsWith( - warn_type=DeprecationWarning, - exc_msg="The 'photon_series_type' argument is deprecated and will be removed in a future version. Please set 'photon_series_type' during the initialization of the BaseImagingExtractorInterface instance.", - ): - self.mock_imaging_interface.get_metadata(photon_series_type="TwoPhotonSeries") From 6960872de0b11f9f04d4f0efecbec4aa9c010c12 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 11 Nov 2024 14:17:24 -0600 Subject: [PATCH 15/28] Add `always_write_timestamps` conversion option to imaging interfaces (#1125) --- CHANGELOG.md | 3 +- pyproject.toml | 26 +++++++------- .../behavior/video/videodatainterface.py | 13 ------- .../ophys/baseimagingextractorinterface.py | 2 ++ .../tools/roiextractors/roiextractors.py | 34 +++++++++++++++---- .../tools/testing/mock_interfaces.py | 2 ++ tests/test_ophys/test_ophys_interfaces.py | 14 +++++++- 7 files changed, 59 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75c6ea917..a06ddf300 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ## Bug Fixes ## Features +* Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) ## Improvements @@ -46,7 +47,7 @@ * Added automated EFS volume creation and mounting to the `submit_aws_job` helper function. [PR #1018](https://github.com/catalystneuro/neuroconv/pull/1018) * Added a mock for segmentation extractors interfaces in ophys: `MockSegmentationInterface` [PR #1067](https://github.com/catalystneuro/neuroconv/pull/1067) * Added a `MockSortingInterface` for testing purposes. [PR #1065](https://github.com/catalystneuro/neuroconv/pull/1065) -* BaseRecordingInterfaces have a new conversion options `always_write_timestamps` that ca be used to force writing timestamps even if neuroconv heuristic indicates regular sampling rate [PR #1091](https://github.com/catalystneuro/neuroconv/pull/1091) +* BaseRecordingInterfaces have a new conversion options `always_write_timestamps` that can be used to force writing timestamps even if neuroconv heuristic indicates regular sampling rate [PR #1091](https://github.com/catalystneuro/neuroconv/pull/1091) ## Improvements diff --git a/pyproject.toml b/pyproject.toml index a83380467..5efd432f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -270,50 +270,50 @@ icephys = [ ## Ophys brukertiff = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", "tifffile>=2023.3.21", ] caiman = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] cnmfe = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] extract = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] hdf5 = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] micromanagertiff = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", "tifffile>=2023.3.21", ] miniscope = [ "natsort>=8.3.1", "ndx-miniscope>=0.5.1", - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] sbx = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] scanimage = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", "scanimage-tiff-reader>=1.4.1", ] sima = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] suite2p = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", ] tdt_fp = [ "ndx-fiber-photometry", - "roiextractors>=0.5.7", + "roiextractors>=0.5.10", "tdt", ] tiff = [ - "roiextractors>=0.5.7", + "roiextractors>=0.5.9", "tiffile>=2018.10.18", ] ophys = [ diff --git a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py index a544f9c27..aaa875f3e 100644 --- a/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/video/videodatainterface.py @@ -269,8 +269,6 @@ def add_to_nwbfile( chunk_data: bool = True, module_name: Optional[str] = None, module_description: Optional[str] = None, - compression: Optional[str] = "gzip", - compression_options: Optional[int] = None, ): """ Convert the video data files to :py:class:`~pynwb.image.ImageSeries` and write them in the @@ -431,17 +429,6 @@ def add_to_nwbfile( pbar.update(1) iterable = video - # TODO: remove completely after 03/1/2024 - if compression is not None or compression_options is not None: - warnings.warn( - message=( - "Specifying compression methods and their options for this interface has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) - image_series_kwargs.update(data=iterable) if timing_type == "starting_time and rate": diff --git a/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py b/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py index 9f88b861f..0019b8bd7 100644 --- a/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ophys/baseimagingextractorinterface.py @@ -128,6 +128,7 @@ def add_to_nwbfile( parent_container: Literal["acquisition", "processing/ophys"] = "acquisition", stub_test: bool = False, stub_frames: int = 100, + always_write_timestamps: bool = False, ): """ Add imaging data to the NWB file @@ -167,4 +168,5 @@ def add_to_nwbfile( photon_series_type=photon_series_type, photon_series_index=photon_series_index, parent_container=parent_container, + always_write_timestamps=always_write_timestamps, ) diff --git a/src/neuroconv/tools/roiextractors/roiextractors.py b/src/neuroconv/tools/roiextractors/roiextractors.py index f28631c77..27d3b5f9c 100644 --- a/src/neuroconv/tools/roiextractors/roiextractors.py +++ b/src/neuroconv/tools/roiextractors/roiextractors.py @@ -445,6 +445,7 @@ def add_photon_series_to_nwbfile( parent_container: Literal["acquisition", "processing/ophys"] = "acquisition", iterator_type: Optional[str] = "v2", iterator_options: Optional[dict] = None, + always_write_timestamps: bool = False, ) -> NWBFile: """ Auxiliary static method for nwbextractor. @@ -472,6 +473,11 @@ def add_photon_series_to_nwbfile( iterator_type: str, default: 'v2' The type of iterator to use when adding the photon series to the NWB file. iterator_options: dict, optional + always_write_timestamps : bool, default: False + Set to True to always write timestamps. + By default (False), the function checks if the timestamps are uniformly sampled, and if so, stores the data + using a regular sampling rate instead of explicit timestamps. If set to True, timestamps will be written + explicitly, regardless of whether the sampling rate is uniform. Returns ------- @@ -530,16 +536,23 @@ def add_photon_series_to_nwbfile( photon_series_kwargs.update(dimension=imaging.get_image_size()) # Add timestamps or rate - if imaging.has_time_vector(): + if always_write_timestamps: timestamps = imaging.frame_to_time(np.arange(imaging.get_num_frames())) - estimated_rate = calculate_regular_series_rate(series=timestamps) + photon_series_kwargs.update(timestamps=timestamps) + else: + imaging_has_timestamps = imaging.has_time_vector() + if imaging_has_timestamps: + timestamps = imaging.frame_to_time(np.arange(imaging.get_num_frames())) + estimated_rate = calculate_regular_series_rate(series=timestamps) + starting_time = timestamps[0] + else: + estimated_rate = float(imaging.get_sampling_frequency()) + starting_time = 0.0 + if estimated_rate: - photon_series_kwargs.update(starting_time=timestamps[0], rate=estimated_rate) + photon_series_kwargs.update(rate=estimated_rate, starting_time=starting_time) else: - photon_series_kwargs.update(timestamps=timestamps, rate=None) - else: - rate = float(imaging.get_sampling_frequency()) - photon_series_kwargs.update(rate=rate) + photon_series_kwargs.update(timestamps=timestamps) # Add the photon series to the nwbfile (either as OnePhotonSeries or TwoPhotonSeries) photon_series = dict( @@ -682,6 +695,7 @@ def add_imaging_to_nwbfile( iterator_type: Optional[str] = "v2", iterator_options: Optional[dict] = None, parent_container: Literal["acquisition", "processing/ophys"] = "acquisition", + always_write_timestamps: bool = False, ) -> NWBFile: """ Add imaging data from an ImagingExtractor object to an NWBFile. @@ -705,6 +719,11 @@ def add_imaging_to_nwbfile( parent_container : {"acquisition", "processing/ophys"}, optional Specifies the parent container to which the photon series should be added, either as part of "acquisition" or under the "processing/ophys" module, by default "acquisition". + always_write_timestamps : bool, default: False + Set to True to always write timestamps. + By default (False), the function checks if the timestamps are uniformly sampled, and if so, stores the data + using a regular sampling rate instead of explicit timestamps. If set to True, timestamps will be written + explicitly, regardless of whether the sampling rate is uniform. Returns ------- @@ -722,6 +741,7 @@ def add_imaging_to_nwbfile( iterator_type=iterator_type, iterator_options=iterator_options, parent_container=parent_container, + always_write_timestamps=always_write_timestamps, ) return nwbfile diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index 44d1adf61..dd3ec12c2 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -260,6 +260,7 @@ def __init__( sampling_frequency=sampling_frequency, dtype=dtype, verbose=verbose, + seed=seed, ) self.verbose = verbose @@ -334,6 +335,7 @@ def __init__( has_deconvolved_signal=has_deconvolved_signal, has_neuropil_signal=has_neuropil_signal, verbose=verbose, + seed=seed, ) def get_metadata(self) -> dict: diff --git a/tests/test_ophys/test_ophys_interfaces.py b/tests/test_ophys/test_ophys_interfaces.py index 4381faf8b..3ea329e5f 100644 --- a/tests/test_ophys/test_ophys_interfaces.py +++ b/tests/test_ophys/test_ophys_interfaces.py @@ -1,3 +1,5 @@ +import numpy as np + from neuroconv.tools.testing.data_interface_mixins import ( ImagingExtractorInterfaceTestMixin, SegmentationExtractorInterfaceTestMixin, @@ -12,7 +14,17 @@ class TestMockImagingInterface(ImagingExtractorInterfaceTestMixin): data_interface_cls = MockImagingInterface interface_kwargs = dict() - # TODO: fix this by setting a seed on the dummy imaging extractor + def test_always_write_timestamps(self, setup_interface): + # By default the MockImagingInterface has a uniform sampling rate + + nwbfile = self.interface.create_nwbfile(always_write_timestamps=True) + two_photon_series = nwbfile.acquisition["TwoPhotonSeries"] + imaging = self.interface.imaging_extractor + expected_timestamps = imaging.frame_to_time(np.arange(imaging.get_num_frames())) + + np.testing.assert_array_equal(two_photon_series.timestamps[:], expected_timestamps) + + # Remove this after roiextractors 0.5.10 is released def test_all_conversion_checks(self): pass From e3cde1f38d9de4f970ffdb1ffd2f73078bb04e0b Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Nov 2024 07:47:34 -0600 Subject: [PATCH 16/28] Support datetime in conversion options (#1139) --- CHANGELOG.md | 1 + src/neuroconv/basedatainterface.py | 4 ++- src/neuroconv/nwbconverter.py | 29 ++++++++++++------ .../tools/testing/mock_interfaces.py | 21 +++++++++++++ src/neuroconv/utils/json_schema.py | 17 +++++++++++ .../test_minimal/test_interface_validation.py | 30 +++++++++++++++++++ 6 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 tests/test_minimal/test_interface_validation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a06ddf300..cdc70223f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) ## Bug Fixes +* datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) ## Features * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py index adcec89b5..272abbd0c 100644 --- a/src/neuroconv/basedatainterface.py +++ b/src/neuroconv/basedatainterface.py @@ -126,7 +126,7 @@ def create_nwbfile(self, metadata: Optional[dict] = None, **conversion_options) return nwbfile @abstractmethod - def add_to_nwbfile(self, nwbfile: NWBFile, **conversion_options) -> None: + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict], **conversion_options) -> None: """ Define a protocol for mapping the data from this interface to NWB neurodata objects. @@ -136,6 +136,8 @@ def add_to_nwbfile(self, nwbfile: NWBFile, **conversion_options) -> None: ---------- nwbfile : pynwb.NWBFile The in-memory object to add the data to. + metadata : dict + Metadata dictionary with information used to create the NWBFile. **conversion_options Additional keyword arguments to pass to the `.add_to_nwbfile` method. """ diff --git a/src/neuroconv/nwbconverter.py b/src/neuroconv/nwbconverter.py index 1f3e7c9f8..fe1b09915 100644 --- a/src/neuroconv/nwbconverter.py +++ b/src/neuroconv/nwbconverter.py @@ -29,7 +29,11 @@ unroot_schema, ) from .utils.dict import DeepDict -from .utils.json_schema import _NWBMetaDataEncoder, _NWBSourceDataEncoder +from .utils.json_schema import ( + _NWBConversionOptionsEncoder, + _NWBMetaDataEncoder, + _NWBSourceDataEncoder, +) class NWBConverter: @@ -63,11 +67,10 @@ def validate_source(cls, source_data: dict[str, dict], verbose: bool = True): def _validate_source_data(self, source_data: dict[str, dict], verbose: bool = True): + # We do this to ensure that python objects are in string format for the JSON schema encoder = _NWBSourceDataEncoder() - # The encoder produces a serialized object, so we deserialized it for comparison - - serialized_source_data = encoder.encode(source_data) - decoded_source_data = json.loads(serialized_source_data) + encoded_source_data = encoder.encode(source_data) + decoded_source_data = json.loads(encoded_source_data) validate(instance=decoded_source_data, schema=self.get_source_schema()) if verbose: @@ -106,9 +109,10 @@ def get_metadata(self) -> DeepDict: def validate_metadata(self, metadata: dict[str, dict], append_mode: bool = False): """Validate metadata against Converter metadata_schema.""" encoder = _NWBMetaDataEncoder() - # The encoder produces a serialized object, so we deserialized it for comparison - serialized_metadata = encoder.encode(metadata) - decoded_metadata = json.loads(serialized_metadata) + + # We do this to ensure that python objects are in string format for the JSON schema + encoded_metadta = encoder.encode(metadata) + decoded_metadata = json.loads(encoded_metadta) metadata_schema = self.get_metadata_schema() if append_mode: @@ -138,7 +142,14 @@ def get_conversion_options_schema(self) -> dict: def validate_conversion_options(self, conversion_options: dict[str, dict]): """Validate conversion_options against Converter conversion_options_schema.""" - validate(instance=conversion_options or {}, schema=self.get_conversion_options_schema()) + + conversion_options = conversion_options or dict() + + # We do this to ensure that python objects are in string format for the JSON schema + encoded_conversion_options = _NWBConversionOptionsEncoder().encode(conversion_options) + decoded_conversion_options = json.loads(encoded_conversion_options) + + validate(instance=decoded_conversion_options, schema=self.get_conversion_options_schema()) if self.verbose: print("conversion_options is valid!") diff --git a/src/neuroconv/tools/testing/mock_interfaces.py b/src/neuroconv/tools/testing/mock_interfaces.py index dd3ec12c2..0652284e7 100644 --- a/src/neuroconv/tools/testing/mock_interfaces.py +++ b/src/neuroconv/tools/testing/mock_interfaces.py @@ -6,6 +6,7 @@ from pynwb.base import DynamicTable from .mock_ttl_signals import generate_mock_ttl_signal +from ...basedatainterface import BaseDataInterface from ...basetemporalalignmentinterface import BaseTemporalAlignmentInterface from ...datainterfaces import SpikeGLXNIDQInterface from ...datainterfaces.ecephys.baserecordingextractorinterface import ( @@ -23,6 +24,26 @@ from ...utils import ArrayType, get_json_schema_from_method_signature +class MockInterface(BaseDataInterface): + """ + A mock interface for testing basic command passing without side effects. + """ + + def __init__(self, verbose: bool = False, **source_data): + + super().__init__(verbose=verbose, **source_data) + + def get_metadata(self) -> dict: + metadata = super().get_metadata() + session_start_time = datetime.now().astimezone() + metadata["NWBFile"]["session_start_time"] = session_start_time + return metadata + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict], **conversion_options): + + return None + + class MockBehaviorEventInterface(BaseTemporalAlignmentInterface): """ A mock behavior event interface for testing purposes. diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py index 182558b98..07dc3321f 100644 --- a/src/neuroconv/utils/json_schema.py +++ b/src/neuroconv/utils/json_schema.py @@ -60,6 +60,23 @@ def default(self, obj): return super().default(obj) +class _NWBConversionOptionsEncoder(_NWBMetaDataEncoder): + """ + Custom JSON encoder for conversion options of the data interfaces and converters (i.e. kwargs). + + This encoder extends the default JSONEncoder class and provides custom serialization + for certain data types commonly used in interface source data. + """ + + def default(self, obj): + + # Over-write behaviors for Paths + if isinstance(obj, Path): + return str(obj) + + return super().default(obj) + + def get_base_schema( tag: Optional[str] = None, root: bool = False, diff --git a/tests/test_minimal/test_interface_validation.py b/tests/test_minimal/test_interface_validation.py new file mode 100644 index 000000000..1bc409b06 --- /dev/null +++ b/tests/test_minimal/test_interface_validation.py @@ -0,0 +1,30 @@ +from datetime import datetime +from typing import Optional + +from pynwb import NWBFile + +from neuroconv import ConverterPipe +from neuroconv.tools.testing.mock_interfaces import ( + MockInterface, +) + + +def test_conversion_options_validation(tmp_path): + + class InterfaceWithDateTimeConversionOptions(MockInterface): + "class for testing how a file with datetime object is validated" + + def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict], datetime_option: datetime): + pass + + interface = InterfaceWithDateTimeConversionOptions() + + nwbfile_path = tmp_path / "interface_test.nwb" + interface.run_conversion(nwbfile_path=nwbfile_path, datetime_option=datetime.now(), overwrite=True) + + data_interfaces = {"InterfaceWithDateTimeConversionOptions": interface} + conversion_options = {"InterfaceWithDateTimeConversionOptions": {"datetime_option": datetime.now()}} + converter = ConverterPipe(data_interfaces=data_interfaces) + + nwbfile_path = tmp_path / "converter_test.nwb" + converter.run_conversion(nwbfile_path=nwbfile_path, overwrite=True, conversion_options=conversion_options) From 56673dddd246f806ec2d7ee1911d86fcd21414ae Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Fri, 15 Nov 2024 06:52:44 +1100 Subject: [PATCH 17/28] Added CSV support to DeepLabCutInterface (#1140) --- CHANGELOG.md | 1 + .../behavior/deeplabcut.rst | 5 +- .../behavior/deeplabcut/_dlc_utils.py | 41 ++++-------- .../deeplabcut/deeplabcutdatainterface.py | 30 +++++---- .../tools/testing/data_interface_mixins.py | 24 ------- .../behavior/test_behavior_interfaces.py | 65 ++++++++++++++++--- 6 files changed, 91 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdc70223f..0545001d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Features * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) +* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) ## Improvements diff --git a/docs/conversion_examples_gallery/behavior/deeplabcut.rst b/docs/conversion_examples_gallery/behavior/deeplabcut.rst index c20dd057d..64201ea72 100644 --- a/docs/conversion_examples_gallery/behavior/deeplabcut.rst +++ b/docs/conversion_examples_gallery/behavior/deeplabcut.rst @@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab pip install "neuroconv[deeplabcut]" Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`. +This interface supports both .h5 and .csv output files from DeepLabCut. .. code-block:: python @@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai >>> from pathlib import Path >>> from neuroconv.datainterfaces import DeepLabCutInterface - >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" - >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml" + >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml" >>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False) diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py index 9e368fb39..5d1224e85 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py @@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str): return video_file_path, image_shape -def _get_pes_args( - *, - h5file: Path, - individual_name: str, -): - h5file = Path(h5file) - - _, scorer = h5file.stem.split("DLC") - scorer = "DLC" + scorer - - df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name) - - return scorer, df - - def _write_pes_to_nwbfile( nwbfile, animal, @@ -339,23 +324,23 @@ def _write_pes_to_nwbfile( return nwbfile -def add_subject_to_nwbfile( +def _add_subject_to_nwbfile( nwbfile: NWBFile, - h5file: FilePath, + file_path: FilePath, individual_name: str, config_file: Optional[FilePath] = None, timestamps: Optional[Union[list, np.ndarray]] = None, pose_estimation_container_kwargs: Optional[dict] = None, ) -> NWBFile: """ - Given the subject name, add the DLC .h5 file to an in-memory NWBFile object. + Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object. Parameters ---------- nwbfile : pynwb.NWBFile The in-memory nwbfile object to which the subject specific pose estimation series will be added. - h5file : str or path - Path to the DeepLabCut .h5 output file. + file_path : str or path + Path to the DeepLabCut .h5 or .csv output file. individual_name : str Name of the subject (whose pose is predicted) for single-animal DLC project. For multi-animal projects, the names from the DLC project will be used directly. @@ -371,18 +356,18 @@ def add_subject_to_nwbfile( nwbfile : pynwb.NWBFile nwbfile with pes written in the behavior module """ - h5file = Path(h5file) - - if "DLC" not in h5file.name or not h5file.suffix == ".h5": - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + file_path = Path(file_path) - video_name, scorer = h5file.stem.split("DLC") + video_name, scorer = file_path.stem.split("DLC") scorer = "DLC" + scorer # TODO probably could be read directly with h5py # This requires pytables - data_frame_from_hdf5 = pd.read_hdf(h5file) - df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name) + if ".h5" in file_path.suffixes: + df = pd.read_hdf(file_path) + elif ".csv" in file_path.suffixes: + df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0) + df = _ensure_individuals_in_header(df, individual_name) # Note the video here is a tuple of the video path and the image shape if config_file is not None: @@ -404,7 +389,7 @@ def add_subject_to_nwbfile( # Fetch the corresponding metadata pickle file, we extract the edges graph from here # TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it - filename = str(h5file.parent / h5file.stem) + filename = str(file_path.parent / file_path.stem) for i, c in enumerate(filename[::-1]): if c.isnumeric(): break diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py index 21b054e85..f45913061 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py @@ -5,6 +5,7 @@ from pydantic import FilePath, validate_call from pynwb.file import NWBFile +# import ndx_pose from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface @@ -13,7 +14,7 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): display_name = "DeepLabCut" keywords = ("DLC",) - associated_suffixes = (".h5",) + associated_suffixes = (".h5", ".csv") info = "Interface for handling data from DeepLabCut." _timestamps = None @@ -21,8 +22,8 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): @classmethod def get_source_schema(cls) -> dict: source_schema = super().get_source_schema() - source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc." - source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file" + source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)." + source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file." return source_schema @validate_call @@ -34,24 +35,25 @@ def __init__( verbose: bool = True, ): """ - Interface for writing DLC's h5 files to nwb using dlc2nwb. + Interface for writing DLC's output files to nwb using dlc2nwb. Parameters ---------- file_path : FilePath - path to the h5 file output by dlc. + Path to the file output by dlc (.h5 or .csv). config_file_path : FilePath, optional - path to .yml config file + Path to .yml config file subject_name : str, default: "ind1" - the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. + The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. verbose: bool, default: True - controls verbosity. + Controls verbosity. """ from ._dlc_utils import _read_config file_path = Path(file_path) - if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes: - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes + if not "DLC" in file_path.stem or not suffix_is_valid: + raise IOError("The file passed in is not a valid DeepLabCut output data file.") self.config_dict = dict() if config_file_path is not None: @@ -108,12 +110,14 @@ def add_to_nwbfile( nwb file to which the recording information is to be added metadata: dict metadata info for constructing the nwb file (optional). + container_name: str, default: "PoseEstimation" + Name of the container to store the pose estimation. """ - from ._dlc_utils import add_subject_to_nwbfile + from ._dlc_utils import _add_subject_to_nwbfile - add_subject_to_nwbfile( + _add_subject_to_nwbfile( nwbfile=nwbfile, - h5file=str(self.source_data["file_path"]), + file_path=str(self.source_data["file_path"]), individual_name=self.subject_name, config_file=self.source_data["config_file_path"], timestamps=self._timestamps, diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 946b3fd6c..5187ff2e4 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -743,30 +743,6 @@ def test_interface_alignment(self): pass -class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): - """ - A mixin for testing DeepLabCut interfaces. - """ - - def check_interface_get_original_timestamps(self): - pass # TODO in separate PR - - def check_interface_get_timestamps(self): - pass # TODO in separate PR - - def check_interface_set_aligned_timestamps(self): - pass # TODO in separate PR - - def check_shift_timestamps_by_start_time(self): - pass # TODO in separate PR - - def check_interface_original_timestamps_inmutability(self): - pass # TODO in separate PR - - def check_nwbfile_temporal_alignment(self): - pass # TODO in separate PR - - class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): """ A mixin for testing Video interfaces. diff --git a/tests/test_on_data/behavior/test_behavior_interfaces.py b/tests/test_on_data/behavior/test_behavior_interfaces.py index 8e3e01d61..b43e65206 100644 --- a/tests/test_on_data/behavior/test_behavior_interfaces.py +++ b/tests/test_on_data/behavior/test_behavior_interfaces.py @@ -29,7 +29,6 @@ ) from neuroconv.tools.testing.data_interface_mixins import ( DataInterfaceTestMixin, - DeepLabCutInterfaceMixin, MedPCInterfaceMixin, TemporalAlignmentMixin, VideoInterfaceMixin, @@ -332,11 +331,16 @@ class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterface(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterface(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) save_directory = OUTPUT_PATH @@ -384,7 +388,12 @@ def check_read_nwb(self, nwbfile_path: str): class TestDeepLabCutInterfaceNoConfigFile(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), config_file_path=None, subject_name="ind1", ) @@ -411,11 +420,16 @@ def check_read_nwb(self, nwbfile_path: str): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterfaceSetTimestamps(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterfaceSetTimestamps(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) @@ -454,6 +468,41 @@ def check_read_nwb(self, nwbfile_path: str): pass +@pytest.mark.skipif( + platform == "darwin" and python_version < version.parse("3.10"), + reason="interface not supported on macOS with Python < 3.10", +) +class TestDeepLabCutInterfaceFromCSV(DataInterfaceTestMixin): + data_interface_cls = DeepLabCutInterface + interface_kwargs = dict( + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "SL18_csv" + / "SL18_D19_S01_F01_BOX_SLP_20230503_112642.1DLC_resnet50_SubLearnSleepBoxRedLightJun26shuffle1_100000_stubbed.csv" + ), + config_file_path=None, + subject_name="SL18", + ) + save_directory = OUTPUT_PATH + + def check_read_nwb(self, nwbfile_path: str): + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: + nwbfile = io.read() + assert "behavior" in nwbfile.processing + processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces + assert "PoseEstimation" in processing_module_interfaces + + pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series + expected_pose_estimation_series = ["SL18_redled", "SL18_shoulder", "SL18_haunch", "SL18_baseoftail"] + + expected_pose_estimation_series_are_in_nwb_file = [ + pose_estimation in pose_estimation_series_in_nwb for pose_estimation in expected_pose_estimation_series + ] + + assert all(expected_pose_estimation_series_are_in_nwb_file) + + class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin): data_interface_cls = SLEAPInterface interface_kwargs = dict( From 64fb9e01a5f4070fd3b01ebab50d8fc19a2fe953 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Nov 2024 21:33:52 -0600 Subject: [PATCH 18/28] Use mixing tests for mocks (#1136) --- CHANGELOG.md | 1 + .../tools/testing/data_interface_mixins.py | 1 - tests/test_ecephys/test_ecephys_interfaces.py | 114 +++++++----------- .../test_mock_recording_interface.py | 9 -- 4 files changed, 43 insertions(+), 82 deletions(-) delete mode 100644 tests/test_ecephys/test_mock_recording_interface.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0545001d1..92f4e6b5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) ## Improvements +* Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136) # v0.6.5 (November 1, 2024) diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 5187ff2e4..fab049165 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -92,7 +92,6 @@ def test_metadata_schema_valid(self, setup_interface): Draft7Validator.check_schema(schema=schema) def test_metadata(self, setup_interface): - # Validate metadata now happens on the class itself metadata = self.interface.get_metadata() self.check_extracted_metadata(metadata) diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 4d4232bf2..24393923f 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -27,42 +27,61 @@ python_version = Version(get_python_version()) +from neuroconv.tools.testing.data_interface_mixins import ( + RecordingExtractorInterfaceTestMixin, + SortingExtractorInterfaceTestMixin, +) -class TestRecordingInterface(TestCase): - @classmethod - def setUpClass(cls): - cls.single_segment_recording_interface = MockRecordingInterface(durations=[0.100]) - cls.multi_segment_recording_interface = MockRecordingInterface(durations=[0.100, 0.100]) - def test_stub_single_segment(self): - interface = self.single_segment_recording_interface +class TestSortingInterface(SortingExtractorInterfaceTestMixin): + + data_interface_cls = MockSortingInterface + interface_kwargs = dict(num_units=4, durations=[0.100]) + + def test_propagate_conversion_options(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() - interface.create_nwbfile(stub_test=True, metadata=metadata) + nwbfile = interface.create_nwbfile( + stub_test=True, + metadata=metadata, + write_as="processing", + units_name="processed_units", + units_description="The processed units.", + ) - def test_stub_multi_segment(self): - interface = self.multi_segment_recording_interface + ecephys = get_module(nwbfile, "ecephys") + + assert nwbfile.units is None + assert "processed_units" in ecephys.data_interfaces + + +class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): + data_interface_cls = MockRecordingInterface + interface_kwargs = dict(durations=[0.100]) + + def test_stub(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() interface.create_nwbfile(stub_test=True, metadata=metadata) - def test_no_slash_in_name(self): - interface = self.single_segment_recording_interface + def test_no_slash_in_name(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() metadata["Ecephys"]["ElectricalSeries"]["name"] = "test/slash" - with self.assertRaises(jsonschema.exceptions.ValidationError): + with pytest.raises(jsonschema.exceptions.ValidationError): interface.validate_metadata(metadata) + def test_stub_multi_segment(self): -class TestAlwaysWriteTimestamps: + interface = MockRecordingInterface(durations=[0.100, 0.100]) + metadata = interface.get_metadata() + interface.create_nwbfile(stub_test=True, metadata=metadata) - def test_always_write_timestamps(self): - # By default the MockRecordingInterface has a uniform sampling rate - interface = MockRecordingInterface(durations=[1.0], sampling_frequency=30_000.0) + def test_always_write_timestamps(self, setup_interface): - nwbfile = interface.create_nwbfile(always_write_timestamps=True) + nwbfile = self.interface.create_nwbfile(always_write_timestamps=True) electrical_series = nwbfile.acquisition["ElectricalSeries"] - - expected_timestamps = interface.recording_extractor.get_times() - + expected_timestamps = self.interface.recording_extractor.get_times() np.testing.assert_array_equal(electrical_series.timestamps[:], expected_timestamps) @@ -84,33 +103,9 @@ def test_spike2_import_assertions_3_11(self): Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx") -class TestSortingInterface: - - def test_run_conversion(self, tmp_path): - - nwbfile_path = Path(tmp_path) / "test_sorting.nwb" - num_units = 4 - interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) - interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) - - interface.run_conversion(nwbfile_path=nwbfile_path) - with NWBHDF5IO(nwbfile_path, "r") as io: - nwbfile = io.read() - - units = nwbfile.units - assert len(units) == num_units - units_df = units.to_dataframe() - # Get index in units table - for unit_id in interface.sorting_extractor.unit_ids: - # In pynwb we write unit name as unit_id - row = units_df.query(f"unit_name == '{unit_id}'") - spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) - written_spike_times = row["spike_times"].iloc[0] - - np.testing.assert_array_equal(spike_times, written_spike_times) - - class TestSortingInterfaceOld(unittest.TestCase): + """Old-style tests for the SortingInterface. Remove once we we are sure all the behaviors are covered by the mock.""" + @classmethod def setUpClass(cls) -> None: cls.test_dir = Path(mkdtemp()) @@ -194,28 +189,3 @@ def test_sorting_full(self): nwbfile = io.read() for i, start_times in enumerate(self.sorting_start_frames): assert len(nwbfile.units["spike_times"][i]) == self.num_frames - start_times - - def test_sorting_propagate_conversion_options(self): - minimal_nwbfile = self.test_dir / "temp2.nwb" - metadata = self.test_sorting_interface.get_metadata() - metadata["NWBFile"]["session_start_time"] = datetime.now().astimezone() - units_description = "The processed units." - conversion_options = dict( - TestSortingInterface=dict( - write_as="processing", - units_name="processed_units", - units_description=units_description, - ) - ) - self.test_sorting_interface.run_conversion( - nwbfile_path=minimal_nwbfile, - metadata=metadata, - conversion_options=conversion_options, - ) - - with NWBHDF5IO(minimal_nwbfile, "r") as io: - nwbfile = io.read() - ecephys = get_module(nwbfile, "ecephys") - self.assertIsNone(nwbfile.units) - self.assertIn("processed_units", ecephys.data_interfaces) - self.assertEqual(ecephys["processed_units"].description, units_description) diff --git a/tests/test_ecephys/test_mock_recording_interface.py b/tests/test_ecephys/test_mock_recording_interface.py deleted file mode 100644 index a33f3acd1..000000000 --- a/tests/test_ecephys/test_mock_recording_interface.py +++ /dev/null @@ -1,9 +0,0 @@ -from neuroconv.tools.testing.data_interface_mixins import ( - RecordingExtractorInterfaceTestMixin, -) -from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface - - -class TestMockRecordingInterface(RecordingExtractorInterfaceTestMixin): - data_interface_cls = MockRecordingInterface - interface_kwargs = dict(durations=[0.100]) From a608e904ad6fd75f7f983c1555bea1832f850f8e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 15 Nov 2024 12:24:14 -0600 Subject: [PATCH 19/28] Propagate `unit_electrode_indices` to `SortingInterface` (#1124) --- CHANGELOG.md | 1 + .../ecephys/basesortingextractorinterface.py | 8 +++++ .../tools/spikeinterface/spikeinterface.py | 19 +++++++--- tests/test_ecephys/test_ecephys_interfaces.py | 35 +++++++++++++++++++ 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92f4e6b5f..002aed660 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) ## Features +* Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) diff --git a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py index cd8396154..dca2dea5f 100644 --- a/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/basesortingextractorinterface.py @@ -288,6 +288,7 @@ def add_to_nwbfile( write_as: Literal["units", "processing"] = "units", units_name: str = "units", units_description: str = "Autogenerated by neuroconv.", + unit_electrode_indices: Optional[list[list[int]]] = None, ): """ Primary function for converting the data in a SortingExtractor to NWB format. @@ -312,9 +313,15 @@ def add_to_nwbfile( units_name : str, default: 'units' The name of the units table. If write_as=='units', then units_name must also be 'units'. units_description : str, default: 'Autogenerated by neuroconv.' + unit_electrode_indices : list of lists of int, optional + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ from ...tools.spikeinterface import add_sorting_to_nwbfile + if metadata is None: + metadata = self.get_metadata() + metadata_copy = deepcopy(metadata) if write_ecephys_metadata: self.add_channel_metadata_to_nwb(nwbfile=nwbfile, metadata=metadata_copy) @@ -346,4 +353,5 @@ def add_to_nwbfile( write_as=write_as, units_name=units_name, units_description=units_description, + unit_electrode_indices=unit_electrode_indices, ) diff --git a/src/neuroconv/tools/spikeinterface/spikeinterface.py b/src/neuroconv/tools/spikeinterface/spikeinterface.py index 5aa3c8925..fa00d58ed 100644 --- a/src/neuroconv/tools/spikeinterface/spikeinterface.py +++ b/src/neuroconv/tools/spikeinterface/spikeinterface.py @@ -1368,7 +1368,7 @@ def add_units_table_to_nwbfile( write_in_processing_module: bool = False, waveform_means: Optional[np.ndarray] = None, waveform_sds: Optional[np.ndarray] = None, - unit_electrode_indices=None, + unit_electrode_indices: Optional[list[list[int]]] = None, null_values_for_properties: Optional[dict] = None, ): """ @@ -1405,8 +1405,9 @@ def add_units_table_to_nwbfile( Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels). unit_electrode_indices : list of lists of int, optional For each unit, a list of electrode indices corresponding to waveform data. - null_values_for_properties: dict, optional - A dictionary mapping properties to null values to use when the property is not present + unit_electrode_indices : list of lists of int, optional + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ unit_table_description = unit_table_description or "Autogenerated by neuroconv." @@ -1414,6 +1415,13 @@ def add_units_table_to_nwbfile( nwbfile, pynwb.NWBFile ), f"'nwbfile' should be of type pynwb.NWBFile but is of type {type(nwbfile)}" + if unit_electrode_indices is not None: + electrodes_table = nwbfile.electrodes + if electrodes_table is None: + raise ValueError( + "Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first." + ) + null_values_for_properties = dict() if null_values_for_properties is None else null_values_for_properties if not write_in_processing_module and units_table_name != "units": @@ -1668,7 +1676,7 @@ def add_sorting_to_nwbfile( units_description: str = "Autogenerated by neuroconv.", waveform_means: Optional[np.ndarray] = None, waveform_sds: Optional[np.ndarray] = None, - unit_electrode_indices=None, + unit_electrode_indices: Optional[list[list[int]]] = None, ): """Add sorting data (units and their properties) to an NWBFile. @@ -1703,7 +1711,8 @@ def add_sorting_to_nwbfile( waveform_sds : np.ndarray, optional Waveform standard deviation for each unit. Shape: (num_units, num_samples, num_channels). unit_electrode_indices : list of lists of int, optional - For each unit, a list of electrode indices corresponding to waveform data. + A list of lists of integers indicating the indices of the electrodes that each unit is associated with. + The length of the list must match the number of units in the sorting extractor. """ if skip_features is not None: diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 24393923f..e036ccb81 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -54,6 +54,41 @@ def test_propagate_conversion_options(self, setup_interface): assert nwbfile.units is None assert "processed_units" in ecephys.data_interfaces + def test_electrode_indices(self, setup_interface): + + recording_interface = MockRecordingInterface(num_channels=4, durations=[0.100]) + recording_extractor = recording_interface.recording_extractor + recording_extractor = recording_extractor.rename_channels(new_channel_ids=["a", "b", "c", "d"]) + recording_extractor.set_property(key="property", values=["A", "B", "C", "D"]) + recording_interface.recording_extractor = recording_extractor + + nwbfile = recording_interface.create_nwbfile() + + unit_electrode_indices = [[3], [0, 1], [1], [2]] + expected_properties_matching = [["D"], ["A", "B"], ["B"], ["C"]] + self.interface.add_to_nwbfile(nwbfile=nwbfile, unit_electrode_indices=unit_electrode_indices) + + unit_table = nwbfile.units + + for unit_row, electrode_indices, property in zip( + unit_table.to_dataframe().itertuples(index=False), + unit_electrode_indices, + expected_properties_matching, + ): + electrode_table_region = unit_row.electrodes + electrode_table_region_indices = electrode_table_region.index.to_list() + assert electrode_table_region_indices == electrode_indices + + electrode_table_region_properties = electrode_table_region["property"].to_list() + assert electrode_table_region_properties == property + + def test_electrode_indices_assertion_error_when_missing_table(self, setup_interface): + with pytest.raises( + ValueError, + match="Electrodes table is required to map units to electrodes. Add an electrode table to the NWBFile first.", + ): + self.interface.create_nwbfile(unit_electrode_indices=[[0], [1], [2], [3]]) + class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): data_interface_cls = MockRecordingInterface From dcd248b3faec7f9b6f003784dbd899945b565df8 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 22 Nov 2024 12:55:53 -0600 Subject: [PATCH 20/28] Move imports of extensions to the init of the interfaces (#1144) Co-authored-by: Paul Adkisson --- CHANGELOG.md | 3 +- .../behavior/audio/audiointerface.py | 6 ++- .../behavior/deeplabcut/_dlc_utils.py | 4 +- .../deeplabcut/deeplabcutdatainterface.py | 4 +- .../lightningposedatainterface.py | 4 ++ .../behavior/medpc/medpcdatainterface.py | 4 ++ .../tdt_fp/tdtfiberphotometrydatainterface.py | 1 + src/neuroconv/tools/audio/audio.py | 12 ----- .../behavior/test_behavior_interfaces.py | 47 ++++++++++++++++++- .../behavior/test_lightningpose_converter.py | 3 +- 10 files changed, 69 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 002aed660..c0c3bd729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # Upcoming ## Deprecations -* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) +* Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) +* Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144) ## Features * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) diff --git a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py index fc3f08fb8..6561096b5 100644 --- a/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py +++ b/src/neuroconv/datainterfaces/behavior/audio/audiointerface.py @@ -46,6 +46,10 @@ def __init__(self, file_paths: list[FilePath], verbose: bool = False): verbose : bool, default: False """ + # This import is to assure that ndx_sound is in the global namespace when an pynwb.io object is created. + # For more detail, see https://github.com/rly/ndx-pose/issues/36 + import ndx_sound # noqa: F401 + suffixes = [suffix for file_path in file_paths for suffix in Path(file_path).suffixes] format_is_not_supported = [ suffix for suffix in suffixes if suffix not in [".wav"] @@ -166,7 +170,6 @@ def add_to_nwbfile( stub_frames: int = 1000, write_as: Literal["stimulus", "acquisition"] = "stimulus", iterator_options: Optional[dict] = None, - compression_options: Optional[dict] = None, # TODO: remove completely after 10/1/2024 overwrite: bool = False, verbose: bool = True, ): @@ -224,7 +227,6 @@ def add_to_nwbfile( write_as=write_as, starting_time=starting_times[file_index], iterator_options=iterator_options, - compression_options=compression_options, # TODO: remove completely after 10/1/2024; still passing for deprecation warning ) return nwbfile diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py index 5d1224e85..14866510d 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py @@ -279,13 +279,14 @@ def _write_pes_to_nwbfile( else: timestamps_cleaned = timestamps + timestamps = np.asarray(timestamps_cleaned).astype("float64", copy=False) pes = PoseEstimationSeries( name=f"{animal}_{keypoint}" if animal else keypoint, description=f"Keypoint {keypoint} from individual {animal}.", data=data[:, :2], unit="pixels", reference_frame="(0,0) corresponds to the bottom left corner of the video.", - timestamps=timestamps_cleaned, + timestamps=timestamps, confidence=data[:, 2], confidence_definition="Softmax output of the deep neural network.", ) @@ -298,6 +299,7 @@ def _write_pes_to_nwbfile( # TODO, taken from the original implementation, improve it if the video is passed dimensions = [list(map(int, image_shape.split(",")))[1::2]] + dimensions = np.array(dimensions, dtype="uint32") pose_estimation_default_kwargs = dict( pose_estimation_series=pose_estimation_series, description="2D keypoint coordinates estimated using DeepLabCut.", diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py index f45913061..147fcf6ea 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py @@ -5,7 +5,6 @@ from pydantic import FilePath, validate_call from pynwb.file import NWBFile -# import ndx_pose from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface @@ -48,6 +47,9 @@ def __init__( verbose: bool, default: True Controls verbosity. """ + # This import is to assure that the ndx_pose is in the global namespace when an pynwb.io object is created + from ndx_pose import PoseEstimation, PoseEstimationSeries # noqa: F401 + from ._dlc_utils import _read_config file_path = Path(file_path) diff --git a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py index f103b7c9a..dbd425b5b 100644 --- a/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/lightningpose/lightningposedatainterface.py @@ -80,6 +80,10 @@ def __init__( verbose : bool, default: True controls verbosity. ``True`` by default. """ + # This import is to assure that the ndx_pose is in the global namespace when an pynwb.io object is created + # For more detail, see https://github.com/rly/ndx-pose/issues/36 + import ndx_pose # noqa: F401 + from neuroconv.datainterfaces.behavior.video.video_utils import ( VideoCaptureContext, ) diff --git a/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py b/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py index 6a4127663..09f9111d7 100644 --- a/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py @@ -73,6 +73,10 @@ def __init__( verbose : bool, optional Whether to print verbose output, by default True """ + # This import is to assure that the ndx_events is in the global namespace when an pynwb.io object is created + # For more detail, see https://github.com/rly/ndx-pose/issues/36 + import ndx_events # noqa: F401 + if aligned_timestamp_names is None: aligned_timestamp_names = [] super().__init__( diff --git a/src/neuroconv/datainterfaces/ophys/tdt_fp/tdtfiberphotometrydatainterface.py b/src/neuroconv/datainterfaces/ophys/tdt_fp/tdtfiberphotometrydatainterface.py index aa58f6ae4..8b092464e 100644 --- a/src/neuroconv/datainterfaces/ophys/tdt_fp/tdtfiberphotometrydatainterface.py +++ b/src/neuroconv/datainterfaces/ophys/tdt_fp/tdtfiberphotometrydatainterface.py @@ -43,6 +43,7 @@ def __init__(self, folder_path: DirectoryPath, verbose: bool = True): folder_path=folder_path, verbose=verbose, ) + # This module should be here so ndx_fiber_photometry is in the global namespace when an pynwb.io object is created import ndx_fiber_photometry # noqa: F401 def get_metadata(self) -> DeepDict: diff --git a/src/neuroconv/tools/audio/audio.py b/src/neuroconv/tools/audio/audio.py index 44d10de63..064f36155 100644 --- a/src/neuroconv/tools/audio/audio.py +++ b/src/neuroconv/tools/audio/audio.py @@ -15,7 +15,6 @@ def add_acoustic_waveform_series( starting_time: float = 0.0, write_as: Literal["stimulus", "acquisition"] = "stimulus", iterator_options: Optional[dict] = None, - compression_options: Optional[dict] = None, # TODO: remove completely after 10/1/2024 ) -> NWBFile: """ @@ -53,17 +52,6 @@ def add_acoustic_waveform_series( "acquisition", ], "Acoustic series can be written either as 'stimulus' or 'acquisition'." - # TODO: remove completely after 10/1/2024 - if compression_options is not None: - warn( - message=( - "Specifying compression methods and their options at the level of tool functions has been deprecated. " - "Please use the `configure_backend` tool function for this purpose." - ), - category=DeprecationWarning, - stacklevel=2, - ) - iterator_options = iterator_options or dict() container = nwbfile.acquisition if write_as == "acquisition" else nwbfile.stimulus diff --git a/tests/test_on_data/behavior/test_behavior_interfaces.py b/tests/test_on_data/behavior/test_behavior_interfaces.py index b43e65206..0b5c63376 100644 --- a/tests/test_on_data/behavior/test_behavior_interfaces.py +++ b/tests/test_on_data/behavior/test_behavior_interfaces.py @@ -1,3 +1,4 @@ +import sys import unittest from datetime import datetime, timezone from pathlib import Path @@ -10,7 +11,6 @@ from natsort import natsorted from ndx_miniscope import Miniscope from ndx_miniscope.utils import get_timestamps -from ndx_pose import PoseEstimation, PoseEstimationSeries from numpy.testing import assert_array_equal from parameterized import param, parameterized from pynwb import NWBHDF5IO @@ -105,6 +105,8 @@ def check_extracted_metadata(self, metadata: dict): assert metadata["Behavior"][self.pose_estimation_name] == self.expected_metadata[self.pose_estimation_name] def check_read_nwb(self, nwbfile_path: str): + from ndx_pose import PoseEstimation, PoseEstimationSeries + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: nwbfile = io.read() @@ -381,6 +383,49 @@ def check_read_nwb(self, nwbfile_path: str): assert all(expected_pose_estimation_series_are_in_nwb_file) +@pytest.fixture +def clean_pose_extension_import(): + modules_to_remove = [m for m in sys.modules if m.startswith("ndx_pose")] + for module in modules_to_remove: + del sys.modules[module] + + +@pytest.mark.skipif( + platform == "darwin" and python_version < version.parse("3.10"), + reason="interface not supported on macOS with Python < 3.10", +) +def test_deep_lab_cut_import_pose_extension_bug(clean_pose_extension_import, tmp_path): + """ + Test that the DeepLabCutInterface writes correctly without importing the ndx-pose extension. + See issues: + https://github.com/catalystneuro/neuroconv/issues/1114 + https://github.com/rly/ndx-pose/issues/36 + + """ + + interface_kwargs = dict( + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), + ) + + interface = DeepLabCutInterface(**interface_kwargs) + metadata = interface.get_metadata() + metadata["NWBFile"]["session_start_time"] = datetime(2023, 7, 24, 9, 30, 55, 440600, tzinfo=timezone.utc) + + nwbfile_path = tmp_path / "test.nwb" + interface.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata, overwrite=True) + with NWBHDF5IO(path=nwbfile_path, mode="r") as io: + read_nwbfile = io.read() + pose_estimation_container = read_nwbfile.processing["behavior"]["PoseEstimation"] + + assert len(pose_estimation_container.fields) > 0 + + @pytest.mark.skipif( platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", diff --git a/tests/test_on_data/behavior/test_lightningpose_converter.py b/tests/test_on_data/behavior/test_lightningpose_converter.py index 4d0f8ab89..dd93632a4 100644 --- a/tests/test_on_data/behavior/test_lightningpose_converter.py +++ b/tests/test_on_data/behavior/test_lightningpose_converter.py @@ -5,7 +5,6 @@ from warnings import warn from hdmf.testing import TestCase -from ndx_pose import PoseEstimation from pynwb import NWBHDF5IO from pynwb.image import ImageSeries @@ -134,6 +133,8 @@ def test_run_conversion_add_conversion_options(self): self.assertNWBFileStructure(nwbfile_path=nwbfile_path, **self.conversion_options) def assertNWBFileStructure(self, nwbfile_path: str, stub_test: bool = False): + from ndx_pose import PoseEstimation + with NWBHDF5IO(path=nwbfile_path) as io: nwbfile = io.read() From 006184ae120f4c4dd73349b354b4717345f7507d Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Tue, 26 Nov 2024 18:44:53 -0500 Subject: [PATCH 21/28] [Cloud Deployment IVd] AWS usage docs (#1104) Co-authored-by: Heberto Mayorquin --- docs/api/tools.aws.rst | 5 ++ docs/api/tools.rst | 1 + docs/user_guide/aws_demo.rst | 136 +++++++++++++++++++++++++++++++++++ docs/user_guide/index.rst | 1 + 4 files changed, 143 insertions(+) create mode 100644 docs/api/tools.aws.rst create mode 100644 docs/user_guide/aws_demo.rst diff --git a/docs/api/tools.aws.rst b/docs/api/tools.aws.rst new file mode 100644 index 000000000..6f3ed0f86 --- /dev/null +++ b/docs/api/tools.aws.rst @@ -0,0 +1,5 @@ +.. _api_docs_aws_tools: + +AWS Tools +--------- +.. automodule:: neuroconv.tools.aws diff --git a/docs/api/tools.rst b/docs/api/tools.rst index 41515facd..793e0dbeb 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -13,3 +13,4 @@ Tools tools.signal_processing tools.data_transfers tools.nwb_helpers + tools.aws diff --git a/docs/user_guide/aws_demo.rst b/docs/user_guide/aws_demo.rst new file mode 100644 index 000000000..7002b7057 --- /dev/null +++ b/docs/user_guide/aws_demo.rst @@ -0,0 +1,136 @@ +NeuroConv AWS Demo +------------------ + +The :ref:`neuroconv.tools.aws ` submodule provides a number of tools for deploying NWB conversions +within AWS cloud services. These tools are primarily for facilitating source data transfers from cloud storage +sources to AWS, where the NWB conversion takes place, following by immediate direct upload to the `Dandi Archive `_. + +The following is an explicit demonstration of how to use these to create a pipeline to run a remote data conversion. + +This tutorial relies on setting up several cloud-based aspects ahead of time: + +a. Download some of the GIN data from the main testing suite, see :ref:`example_data` for more +details. Specifically, you will need the ``spikeglx`` and ``phy`` folders. + +b. Have access to a `Google Drive `_ folder to mimic a typical remote storage +location. The example data from (a) only takes up about 20 MB of space, so ensure you have that available. In +practice, any `cloud storage provider that can be accessed via Rclone `_ can be used. + +c. Install `Rclone `_, run ``rclone config``, and follow all instructions while giving your +remote the name ``test_google_drive_remote``. This step is necessary to provide the necessary credentials to access +the Google Drive folder from other locations by creating a file called ``rclone.conf``. You can find the path to +file, which you will need for a later step, by running ``rclone config file``. + +d. Have access to an `AWS account `_. Then, from +the `AWS console `_, sign in and navigate to the "IAM" page. Here, you will +generate some credentials by creating a new user with programmatic access. Save your access key and secret key +somewhere safe (such as installing the `AWS CLI `_ and running ``aws configure`` +to store the values on your local device). + +e. Have access to an account on both the `staging/testing server `_ (you +will probably want one on the main archive as well, but please do not upload demonstration data to the primary +server). This request can take a few days for the admin team to process. Once you have access, you will need +to create a new Dandiset on the staging server and record the six-digit Dandiset ID. + +.. warning:: + + *Cloud costs*. While the operations deployed on your behalf by NeuroConv are optimized to the best extent we can, cloud services can still become expensive. Please be aware of the costs associated with running these services and ensure you have the necessary permissions and budget to run these operations. While NeuroConv makes every effort to ensure there are no stalled resources, it is ultimately your responsibility to monitor and manage these resources. We recommend checking the AWS dashboards regularly while running these operations, manually removing any spurious resources, and setting up billing alerts to ensure you do not exceed your budget. + +Then, to setup the remaining steps of the tutorial: + +1. In your Google Drive, make a new folder for this demo conversion named ``demo_neuroconv_aws`` at the outermost +level (not nested in any other folders). + +2. Create a file on your local device named ``demo_neuroconv_aws.yml`` with the following content: + +.. code-block:: yaml + + metadata: + NWBFile: + lab: My Lab + institution: My Institution + + data_interfaces: + ap: SpikeGLXRecordingInterface + phy: PhySortingInterface + + upload_to_dandiset: "< enter your six-digit Dandiset ID here >" + + experiments: + my_experiment: + metadata: + NWBFile: + session_description: My session. + + sessions: + - source_data: + ap: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + Subject: + subject_id: "1" + sex: F + age: P35D + species: Mus musculus + - metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + Subject: + subject_id: "002" + sex: F + age: P35D + species: Mus musculus + source_data: + phy: + folder_path: phy/phy_example_0/ + + +3. Copy and paste the ``Noise4Sam_g0`` and ``phy_example_0`` folders from the :ref:`example_data` into this demo +folder so that you have the following structure... + +.. code:: + + demo_neuroconv_aws/ + ¦ demo_output/ + ¦ spikeglx/ + ¦ +-- Noise4Sam_g0/ + ¦ +-- ... # .nidq streams + ¦ ¦ +-- Noise4Sam_g0_imec0/ + ¦ ¦ +-- Noise4Sam_g0_t0.imec0.ap.bin + ¦ ¦ +-- Noise4Sam_g0_t0.imec0.ap.meta + ¦ ¦ +-- ... # .lf streams + ¦ phy/ + ¦ +-- phy_example_0/ + ¦ ¦ +-- ... # The various file contents from the example Phy folder + +4. Now run the following Python code to deploy the AWS Batch job: + +.. code:: python + + from neuroconv.tools.aws import deploy_neuroconv_batch_job + + rclone_command = ( + "rclone copy test_google_drive_remote:demo_neuroconv_aws /mnt/efs/source " + "--verbose --progress --config ./rclone.conf" + ) + + # Remember - you can find this via `rclone config file` + rclone_config_file_path = "/path/to/rclone.conf" + + yaml_specification_file_path = "/path/to/demo_neuroconv_aws.yml" + + job_name = "demo_deploy_neuroconv_batch_job" + efs_volume_name = "demo_deploy_neuroconv_batch_job" + deploy_neuroconv_batch_job( + rclone_command=rclone_command, + yaml_specification_file_path=yaml_specification_file_path, + job_name=job_name, + efs_volume_name=efs_volume_name, + rclone_config_file_path=rclone_config_file_path, + ) + +Voilà! If everything occurred successfully, you should eventually (~2-10 minutes) see the files uploaded to your +Dandiset on the staging server. You should also be able to monitor the resources running in the AWS Batch dashboard +as well as on the DynamoDB table. diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst index 4077f49be..bf9aaf253 100644 --- a/docs/user_guide/index.rst +++ b/docs/user_guide/index.rst @@ -27,3 +27,4 @@ and synchronize data across multiple sources. backend_configuration yaml docker_demo + aws_demo From c4afad37a8600aed3c21fa73ce45fbb24ba82ea4 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:18:35 -0500 Subject: [PATCH 22/28] [Cloud Deployment IVb] Rclone in AWS on EFS (#1085) Co-authored-by: Heberto Mayorquin --- .../{aws_tests.yml => generic_aws_tests.yml} | 7 +- .github/workflows/rclone_aws_tests.yml | 46 +++++ CHANGELOG.md | 9 + src/neuroconv/tools/aws/__init__.py | 3 +- .../tools/aws/_rclone_transfer_batch_job.py | 113 +++++++++++ .../tools/aws/_submit_aws_batch_job.py | 15 +- tests/docker_rclone_with_config_cli.py | 3 +- .../{aws_tools.py => aws_tools_tests.py} | 0 .../test_yaml/yaml_aws_tools_tests.py | 176 ++++++++++++++++++ 9 files changed, 361 insertions(+), 11 deletions(-) rename .github/workflows/{aws_tests.yml => generic_aws_tests.yml} (88%) create mode 100644 .github/workflows/rclone_aws_tests.yml create mode 100644 src/neuroconv/tools/aws/_rclone_transfer_batch_job.py rename tests/test_minimal/test_tools/{aws_tools.py => aws_tools_tests.py} (100%) create mode 100644 tests/test_on_data/test_yaml/yaml_aws_tools_tests.py diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/generic_aws_tests.yml similarity index 88% rename from .github/workflows/aws_tests.yml rename to .github/workflows/generic_aws_tests.yml index 0ecbb4d7b..20886a178 100644 --- a/.github/workflows/aws_tests.yml +++ b/.github/workflows/generic_aws_tests.yml @@ -11,7 +11,6 @@ concurrency: # Cancel previous workflows on the same pull request env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} jobs: run: @@ -36,8 +35,8 @@ jobs: git config --global user.email "CI@example.com" git config --global user.name "CI Almighty" - - name: Install full requirements + - name: Install AWS requirements run: pip install .[aws,test] - - name: Run subset of tests that use S3 live services - run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py + - name: Run generic AWS tests + run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/.github/workflows/rclone_aws_tests.yml b/.github/workflows/rclone_aws_tests.yml new file mode 100644 index 000000000..bcfbeb5c7 --- /dev/null +++ b/.github/workflows/rclone_aws_tests.yml @@ -0,0 +1,46 @@ +name: Rclone AWS Tests +on: + schedule: + - cron: "0 16 * * 2" # Weekly at noon on Tuesday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} + DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install AWS requirements + run: pip install .[aws,test] + + - name: Run RClone on AWS tests + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_aws_tools_tests.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c0c3bd729..a37093e39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Upcoming +## Features +* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) + + + +## v0.6.4 + ## Deprecations * Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) @@ -37,6 +44,8 @@ * Avoid running link test when the PR is on draft [PR #1093](https://github.com/catalystneuro/neuroconv/pull/1093) * Centralize gin data preparation in a github action [PR #1095](https://github.com/catalystneuro/neuroconv/pull/1095) + + # v0.6.4 (September 17, 2024) ## Bug Fixes diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index d40ddb2dd..88144fb01 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -1,3 +1,4 @@ from ._submit_aws_batch_job import submit_aws_batch_job +from ._rclone_transfer_batch_job import rclone_transfer_batch_job -__all__ = ["submit_aws_batch_job"] +__all__ = ["submit_aws_batch_job", "rclone_transfer_batch_job"] diff --git a/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py new file mode 100644 index 000000000..65bef7824 --- /dev/null +++ b/src/neuroconv/tools/aws/_rclone_transfer_batch_job.py @@ -0,0 +1,113 @@ +"""Collection of helper functions for assessing and performing automated data transfers related to AWS.""" + +import warnings +from typing import Optional + +from pydantic import FilePath, validate_call + +from ._submit_aws_batch_job import submit_aws_batch_job + + +@validate_call +def rclone_transfer_batch_job( + *, + rclone_command: str, + job_name: str, + efs_volume_name: str, + rclone_config_file_path: Optional[FilePath] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gib: int = 4, + minimum_worker_cpus: int = 4, + submission_id: Optional[str] = None, + region: Optional[str] = None, +) -> dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Requires AWS credentials saved to files in the `~/.aws/` folder or set as environment variables. + + Parameters + ---------- + rclone_command : str + The command to pass directly to Rclone running on the EC2 instance. + E.g.: "rclone copy my_drive:testing_rclone /mnt/efs" + Must move data from or to '/mnt/efs'. + job_name : str + The name of the job to submit. + efs_volume_name : str + The name of an EFS volume to be created and attached to the job. + The path exposed to the container will always be `/mnt/efs`. + rclone_config_file_path : FilePath, optional + The path to the Rclone configuration file to use for the job. + If unspecified, method will attempt to find the file in `~/.rclone` and will raise an error if it cannot. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + If unspecified, a name starting with 'neuroconv_batch_' will be generated. + minimum_worker_ram_in_gib : int, default: 4 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GiB to allow comfortable buffer space for data chunk iterators. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. + submission_id : str, optional + The unique ID to pair with this job submission when tracking the status via DynamoDB. + Defaults to a random UUID4. + region : str, optional + The AWS region to use for the job. + If not provided, we will attempt to load the region from your local AWS configuration. + If that file is not found on your system, we will default to "us-east-2", the location of the DANDI Archive. + + Returns + ------- + info : dict + A dictionary containing information about this AWS Batch job. + + info["job_submission_info"] is the return value of `boto3.client.submit_job` which contains the job ID. + info["table_submission_info"] is the initial row data inserted into the DynamoDB status tracking table. + """ + docker_image = "ghcr.io/catalystneuro/rclone_with_config:latest" + + if "/mnt/efs" not in rclone_command: + message = ( + f"The Rclone command '{rclone_command}' does not contain a reference to '/mnt/efs'. " + "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space." + ) + warnings.warn(message=message, stacklevel=2) + + rclone_config_file_path = rclone_config_file_path or pathlib.Path.home() / ".rclone" / "rclone.conf" + if not rclone_config_file_path.exists(): + raise FileNotFoundError( + f"Rclone configuration file not found at: {rclone_config_file_path}! " + "Please check that `rclone config` successfully created the file." + ) + with open(file=rclone_config_file_path, mode="r") as io: + rclone_config_file_stream = io.read() + + region = region or "us-east-2" + + info = submit_aws_batch_job( + job_name=job_name, + docker_image=docker_image, + environment_variables={"RCLONE_CONFIG": rclone_config_file_stream, "RCLONE_COMMAND": rclone_command}, + efs_volume_name=efs_volume_name, + status_tracker_table_name=status_tracker_table_name, + compute_environment_name=compute_environment_name, + job_queue_name=job_queue_name, + job_definition_name=job_definition_name, + minimum_worker_ram_in_gib=minimum_worker_ram_in_gib, + minimum_worker_cpus=minimum_worker_cpus, + submission_id=submission_id, + region=region, + ) + + return info diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 9e3ba0488..748f25399 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -171,7 +171,9 @@ def submit_aws_batch_job( job_dependencies = job_dependencies or [] container_overrides = dict() if environment_variables is not None: - container_overrides["environment"] = [{key: value} for key, value in environment_variables.items()] + container_overrides["environment"] = [ + {"name": key, "value": value} for key, value in environment_variables.items() + ] if commands is not None: container_overrides["command"] = commands @@ -294,7 +296,7 @@ def _ensure_compute_environment_exists( The AWS Batch client to use for the job. max_retries : int, default: 12 If the compute environment does not already exist, then this is the maximum number of times to synchronously - check for its successful creation before erroring. + check for its successful creation before raising an error. This is essential for a clean setup of the entire pipeline, or else later steps might error because they tried to launch before the compute environment was ready. """ @@ -530,7 +532,11 @@ def _generate_job_definition_name( """ docker_tags = docker_image.split(":")[1:] docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - parsed_docker_image_name = docker_image.replace(":", "-") # AWS Batch does not allow colons in job definition names + + # AWS Batch does not allow colons, slashes, or periods in job definition names + parsed_docker_image_name = str(docker_image) + for disallowed_character in [":", r"/", "."]: + parsed_docker_image_name = parsed_docker_image_name.replace(disallowed_character, "-") job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" @@ -540,7 +546,6 @@ def _generate_job_definition_name( job_definition_name += f"_{efs_id}" if docker_tag is None or docker_tag == "latest": date = datetime.now().strftime("%Y-%m-%d") - job_definition_name += f"_created-on-{date}" return job_definition_name @@ -641,7 +646,7 @@ def _ensure_job_definition_exists_and_get_arn( ] mountPoints = [{"containerPath": "/mnt/efs/", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] - # batch_client.register_job_definition() is not synchronous and so we need to wait a bit afterwards + # batch_client.register_job_definition is not synchronous and so we need to wait a bit afterwards batch_client.register_job_definition( jobDefinitionName=job_definition_name, type="container", diff --git a/tests/docker_rclone_with_config_cli.py b/tests/docker_rclone_with_config_cli.py index ed472bdf2..9b1e265dd 100644 --- a/tests/docker_rclone_with_config_cli.py +++ b/tests/docker_rclone_with_config_cli.py @@ -61,7 +61,8 @@ def test_direct_usage_of_rclone_with_config(self): os.environ["RCLONE_CONFIG"] = rclone_config_file_stream os.environ["RCLONE_COMMAND"] = ( - f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} --verbose --progress --config ./rclone.conf" + f"rclone copy test_google_drive_remote:testing_rclone_with_config {self.test_folder} " + "--verbose --progress --config ./rclone.conf" ) command = ( diff --git a/tests/test_minimal/test_tools/aws_tools.py b/tests/test_minimal/test_tools/aws_tools_tests.py similarity index 100% rename from tests/test_minimal/test_tools/aws_tools.py rename to tests/test_minimal/test_tools/aws_tools_tests.py diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py new file mode 100644 index 000000000..7ea49e644 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -0,0 +1,176 @@ +import datetime +import os +import time +import unittest + +import boto3 + +from neuroconv.tools.aws import rclone_transfer_batch_job + +from ..setup_paths import OUTPUT_PATH + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +class TestRcloneTransferBatchJob(unittest.TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikegl_and_phy' with the following structure: + + testing_rclone_spikeglx_and_phy + ├── ci_tests + ├──── spikeglx + ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = "us-east-2" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + self.efs_client = boto3.client( + service_name="efs", + region_name=self.region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + def tearDown(self): + efs_client = self.efs_client + + # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume + # TODO: cleanup job definitions? (since built daily) + mount_targets = efs_client.describe_mount_targets(FileSystemId=self.efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(60) + efs_client.delete_file_system(FileSystemId=self.efs_id) + + def test_rclone_transfer_batch_job(self): + region = self.region + aws_access_key_id = self.aws_access_key_id + aws_secret_access_key = self.aws_secret_access_key + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = self.efs_client + + rclone_command = ( + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy /mnt/efs " + "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? + ) + rclone_config_file_path = self.test_config_file_path + + today = datetime.datetime.now().date().isoformat() + job_name = f"test_rclone_transfer_batch_job_{today}" + efs_volume_name = "test_rclone_transfer_batch_efs" + + info = rclone_transfer_batch_job( + rclone_command=rclone_command, + job_name=job_name, + efs_volume_name=efs_volume_name, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait for AWS to process the job + time.sleep(60) + + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS specific details + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + assert len(matching_efs_volumes) == 1 + efs_volume = matching_efs_volumes[0] + self.efs_id = efs_volume["FileSystemId"] + + # Check normal job completion + assert job["jobName"] == job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) From 54e6ea9a54a09b06400afabbd61069423c52059c Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 6 Dec 2024 13:01:15 -0600 Subject: [PATCH 23/28] Fix live-services tests (dandi) on windows (#1151) --- CHANGELOG.md | 11 +- .../test_tools/dandi_transfer_tools.py | 111 +++++++----------- 2 files changed, 48 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a37093e39..5dbb44ec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,4 @@ -# Upcoming - -## Features -* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) - - - -## v0.6.4 +# v0.6.6 (Upcoming) ## Deprecations * Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) @@ -18,9 +11,11 @@ * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) +* Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) ## Improvements * Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136) +* Use pytest format for dandi tests to avoid window permission error on teardown [PR #1151](https://github.com/catalystneuro/neuroconv/pull/1151) # v0.6.5 (November 1, 2024) diff --git a/tests/test_minimal/test_tools/dandi_transfer_tools.py b/tests/test_minimal/test_tools/dandi_transfer_tools.py index df4226d10..da35725a0 100644 --- a/tests/test_minimal/test_tools/dandi_transfer_tools.py +++ b/tests/test_minimal/test_tools/dandi_transfer_tools.py @@ -1,13 +1,9 @@ import os import sys from datetime import datetime -from pathlib import Path from platform import python_version as get_python_version -from shutil import rmtree -from tempfile import mkdtemp import pytest -from hdmf.testing import TestCase from pynwb import NWBHDF5IO from neuroconv.tools.data_transfers import automatic_dandi_upload @@ -24,80 +20,63 @@ not HAVE_DANDI_KEY, reason="You must set your DANDI_API_KEY to run this test!", ) -class TestAutomaticDANDIUpload(TestCase): - def setUp(self): - self.tmpdir = Path(mkdtemp()) - self.nwb_folder_path = self.tmpdir / "test_nwb" - self.nwb_folder_path.mkdir() - metadata = get_default_nwbfile_metadata() - metadata["NWBFile"].update( - session_start_time=datetime.now().astimezone(), - session_id=f"test-automatic-upload-{sys.platform}-{get_python_version().replace('.', '-')}", - ) - metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) - with NWBHDF5IO(path=self.nwb_folder_path / "test_nwb_1.nwb", mode="w") as io: - io.write(make_nwbfile_from_metadata(metadata=metadata)) +def test_automatic_dandi_upload(tmp_path): + nwb_folder_path = tmp_path / "test_nwb" + nwb_folder_path.mkdir() + metadata = get_default_nwbfile_metadata() + metadata["NWBFile"].update( + session_start_time=datetime.now().astimezone(), + session_id=f"test-automatic-upload-{sys.platform}-{get_python_version().replace('.', '-')}", + ) + metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) + with NWBHDF5IO(path=nwb_folder_path / "test_nwb_1.nwb", mode="w") as io: + io.write(make_nwbfile_from_metadata(metadata=metadata)) - def tearDown(self): - rmtree(self.tmpdir) - - def test_automatic_dandi_upload(self): - automatic_dandi_upload(dandiset_id="200560", nwb_folder_path=self.nwb_folder_path, staging=True) + automatic_dandi_upload(dandiset_id="200560", nwb_folder_path=nwb_folder_path, staging=True) @pytest.mark.skipif( not HAVE_DANDI_KEY, reason="You must set your DANDI_API_KEY to run this test!", ) -class TestAutomaticDANDIUploadNonParallel(TestCase): - def setUp(self): - self.tmpdir = Path(mkdtemp()) - self.nwb_folder_path = self.tmpdir / "test_nwb" - self.nwb_folder_path.mkdir() - metadata = get_default_nwbfile_metadata() - metadata["NWBFile"].update( - session_start_time=datetime.now().astimezone(), - session_id=f"test-automatic-upload-{sys.platform}-{get_python_version().replace('.', '-')}-non-parallel", - ) - metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) - with NWBHDF5IO(path=self.nwb_folder_path / "test_nwb_2.nwb", mode="w") as io: - io.write(make_nwbfile_from_metadata(metadata=metadata)) - - def tearDown(self): - rmtree(self.tmpdir) +def test_automatic_dandi_upload_non_parallel(tmp_path): + nwb_folder_path = tmp_path / "test_nwb" + nwb_folder_path.mkdir() + metadata = get_default_nwbfile_metadata() + metadata["NWBFile"].update( + session_start_time=datetime.now().astimezone(), + session_id=(f"test-automatic-upload-{sys.platform}-" f"{get_python_version().replace('.', '-')}-non-parallel"), + ) + metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) + with NWBHDF5IO(path=nwb_folder_path / "test_nwb_2.nwb", mode="w") as io: + io.write(make_nwbfile_from_metadata(metadata=metadata)) - def test_automatic_dandi_upload_non_parallel(self): - automatic_dandi_upload( - dandiset_id="200560", nwb_folder_path=self.nwb_folder_path, staging=True, number_of_jobs=1 - ) + automatic_dandi_upload(dandiset_id="200560", nwb_folder_path=nwb_folder_path, staging=True, number_of_jobs=1) @pytest.mark.skipif( not HAVE_DANDI_KEY, reason="You must set your DANDI_API_KEY to run this test!", ) -class TestAutomaticDANDIUploadNonParallelNonThreaded(TestCase): - def setUp(self): - self.tmpdir = Path(mkdtemp()) - self.nwb_folder_path = self.tmpdir / "test_nwb" - self.nwb_folder_path.mkdir() - metadata = get_default_nwbfile_metadata() - metadata["NWBFile"].update( - session_start_time=datetime.now().astimezone(), - session_id=f"test-automatic-upload-{sys.platform}-{get_python_version().replace('.', '-')}-non-parallel-non-threaded", - ) - metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) - with NWBHDF5IO(path=self.nwb_folder_path / "test_nwb_3.nwb", mode="w") as io: - io.write(make_nwbfile_from_metadata(metadata=metadata)) - - def tearDown(self): - rmtree(self.tmpdir) +def test_automatic_dandi_upload_non_parallel_non_threaded(tmp_path): + nwb_folder_path = tmp_path / "test_nwb" + nwb_folder_path.mkdir() + metadata = get_default_nwbfile_metadata() + metadata["NWBFile"].update( + session_start_time=datetime.now().astimezone(), + session_id=( + f"test-automatic-upload-{sys.platform}-" + f"{get_python_version().replace('.', '-')}-non-parallel-non-threaded" + ), + ) + metadata.update(Subject=dict(subject_id="foo", species="Mus musculus", age="P1D", sex="U")) + with NWBHDF5IO(path=nwb_folder_path / "test_nwb_3.nwb", mode="w") as io: + io.write(make_nwbfile_from_metadata(metadata=metadata)) - def test_automatic_dandi_upload_non_parallel_non_threaded(self): - automatic_dandi_upload( - dandiset_id="200560", - nwb_folder_path=self.nwb_folder_path, - staging=True, - number_of_jobs=1, - number_of_threads=1, - ) + automatic_dandi_upload( + dandiset_id="200560", + nwb_folder_path=nwb_folder_path, + staging=True, + number_of_jobs=1, + number_of_threads=1, + ) From 96dfdffc9a8a3fb9d9e1897fbfd16ad51a2f1994 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:02:54 -0500 Subject: [PATCH 24/28] [Cloud Deployment IVc] Deploy NeuroConv in AWS with EFS (#1086) Co-authored-by: Heberto Mayorquin --- .../neuroconv_deployment_aws_tests.yml | 46 ++++ CHANGELOG.md | 2 + src/neuroconv/tools/aws/__init__.py | 7 +- .../tools/aws/_deploy_neuroconv_batch_job.py | 241 ++++++++++++++++++ .../tools/aws/_submit_aws_batch_job.py | 20 +- .../_yaml_conversion_specification.py | 8 +- .../neuroconv_deployment_aws_tools_tests.py | 167 ++++++++++++ .../test_yaml/yaml_aws_tools_tests.py | 5 +- 8 files changed, 482 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/neuroconv_deployment_aws_tests.yml create mode 100644 src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py create mode 100644 tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py diff --git a/.github/workflows/neuroconv_deployment_aws_tests.yml b/.github/workflows/neuroconv_deployment_aws_tests.yml new file mode 100644 index 000000000..64aae5ec9 --- /dev/null +++ b/.github/workflows/neuroconv_deployment_aws_tests.yml @@ -0,0 +1,46 @@ +name: NeuroConv Deployment AWS Tests +on: + schedule: + - cron: "0 16 * * 3" # Weekly at noon on Wednesday + workflow_dispatch: + +concurrency: # Cancel previous workflows on the same pull request + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + RCLONE_DRIVE_ACCESS_TOKEN: ${{ secrets.RCLONE_DRIVE_ACCESS_TOKEN }} + RCLONE_DRIVE_REFRESH_TOKEN: ${{ secrets.RCLONE_DRIVE_REFRESH_TOKEN }} + RCLONE_EXPIRY_TOKEN: ${{ secrets.RCLONE_EXPIRY_TOKEN }} + DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + os: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + - run: git fetch --prune --unshallow --tags + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Global Setup + run: | + python -m pip install -U pip # Official recommended way + git config --global user.email "CI@example.com" + git config --global user.name "CI Almighty" + + - name: Install AWS requirements + run: pip install .[aws,test] + + - name: Run NeuroConv Deployment on AWS tests + run: pytest -rsx -n auto tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dbb44ec2..31a0fd7db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) +* Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086) + ## Improvements * Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136) diff --git a/src/neuroconv/tools/aws/__init__.py b/src/neuroconv/tools/aws/__init__.py index 88144fb01..70a42cbf5 100644 --- a/src/neuroconv/tools/aws/__init__.py +++ b/src/neuroconv/tools/aws/__init__.py @@ -1,4 +1,9 @@ from ._submit_aws_batch_job import submit_aws_batch_job from ._rclone_transfer_batch_job import rclone_transfer_batch_job +from ._deploy_neuroconv_batch_job import deploy_neuroconv_batch_job -__all__ = ["submit_aws_batch_job", "rclone_transfer_batch_job"] +__all__ = [ + "submit_aws_batch_job", + "rclone_transfer_batch_job", + "deploy_neuroconv_batch_job", +] diff --git a/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py new file mode 100644 index 000000000..1df86d957 --- /dev/null +++ b/src/neuroconv/tools/aws/_deploy_neuroconv_batch_job.py @@ -0,0 +1,241 @@ +"""Collection of helper functions for deploying NeuroConv in EC2 Batch jobs on AWS.""" + +import os +import time +import uuid +import warnings +from typing import Optional + +import boto3 +from pydantic import FilePath, validate_call + +from ._rclone_transfer_batch_job import rclone_transfer_batch_job +from ._submit_aws_batch_job import submit_aws_batch_job + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +@validate_call +def deploy_neuroconv_batch_job( + *, + rclone_command: str, + yaml_specification_file_path: FilePath, + job_name: str, + efs_volume_name: str, + rclone_config_file_path: Optional[FilePath] = None, + status_tracker_table_name: str = "neuroconv_batch_status_tracker", + compute_environment_name: str = "neuroconv_batch_environment", + job_queue_name: str = "neuroconv_batch_queue", + job_definition_name: Optional[str] = None, + minimum_worker_ram_in_gib: int = 16, # Higher than previous recommendations for safer buffering room + minimum_worker_cpus: int = 4, + region: Optional[str] = None, +) -> dict[str, str]: + """ + Submit a job to AWS Batch for processing. + + Requires AWS credentials saved to files in the `~/.aws/` folder or set as environment variables. + + Parameters + ---------- + rclone_command : str + The command to pass directly to Rclone running on the EC2 instance. + E.g.: "rclone copy my_drive:testing_rclone /mnt/efs/source" + Must move data from or to '/mnt/efs/source'. + yaml_specification_file_path : FilePath + The path to the YAML file containing the NeuroConv specification. + job_name : str + The name of the job to submit. + efs_volume_name : str + The name of an EFS volume to be created and attached to the job. + The path exposed to the container will always be `/mnt/efs`. + rclone_config_file_path : FilePath, optional + The path to the Rclone configuration file to use for the job. + If unspecified, method will attempt to find the file in `~/.rclone` and will raise an error if it cannot. + status_tracker_table_name : str, default: "neuroconv_batch_status_tracker" + The name of the DynamoDB table to use for tracking job status. + compute_environment_name : str, default: "neuroconv_batch_environment" + The name of the compute environment to use for the job. + job_queue_name : str, default: "neuroconv_batch_queue" + The name of the job queue to use for the job. + job_definition_name : str, optional + The name of the job definition to use for the job. + If unspecified, a name starting with 'neuroconv_batch_' will be generated. + minimum_worker_ram_in_gib : int, default: 4 + The minimum amount of base worker memory required to run this job. + Determines the EC2 instance type selected by the automatic 'best fit' selector. + Recommended to be several GiB to allow comfortable buffer space for data chunk iterators. + minimum_worker_cpus : int, default: 4 + The minimum number of CPUs required to run this job. + A minimum of 4 is required, even if only one will be used in the actual process. + region : str, optional + The AWS region to use for the job. + If not provided, we will attempt to load the region from your local AWS configuration. + If that file is not found on your system, we will default to "us-east-2", the location of the DANDI Archive. + + Returns + ------- + info : dict + A dictionary containing information about this AWS Batch job. + + info["rclone_job_submission_info"] is the return value of `neuroconv.tools.aws.rclone_transfer_batch_job`. + info["neuroconv_job_submission_info"] is the return value of `neuroconv.tools.aws.submit_job`. + """ + efs_volume_name = efs_volume_name or f"neuroconv_batch_efs_volume_{uuid.uuid4().hex[:4]}" + region = region or "us-east-2" + + if "/mnt/efs/source" not in rclone_command: + message = ( + f"The Rclone command '{rclone_command}' does not contain a reference to '/mnt/efs/source'. " + "Without utilizing the EFS mount, the instance is unlikely to have enough local disk space. " + "The subfolder 'source' is also required to eliminate ambiguity in the transfer process." + ) + raise ValueError(message) + + rclone_job_name = f"{job_name}_rclone_transfer" + rclone_job_submission_info = rclone_transfer_batch_job( + rclone_command=rclone_command, + job_name=rclone_job_name, + efs_volume_name=efs_volume_name, + rclone_config_file_path=rclone_config_file_path, + region=region, + ) + rclone_job_id = rclone_job_submission_info["job_submission_info"]["jobId"] + + # Give the EFS and other aspects time to spin up before submitting next dependent job + # (Otherwise, good chance that duplicate EFS will be created) + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + available_efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in available_efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + max_iterations = 10 + iteration = 0 + while len(matching_efs_volumes) == 0 and iteration < max_iterations: + iteration += 1 + time.sleep(30) + + matching_efs_volumes = [ + file_system + for file_system in available_efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + + if len(matching_efs_volumes) == 0: + message = f"Unable to create EFS volume '{efs_volume_name}' after {max_iterations} attempts!" + raise ValueError(message) + + docker_image = "ghcr.io/catalystneuro/neuroconv_yaml_variable:latest" + + with open(file=yaml_specification_file_path, mode="r") as io: + yaml_specification_file_stream = io.read() + + neuroconv_job_name = f"{job_name}_neuroconv_deployment" + job_dependencies = [{"jobId": rclone_job_id, "type": "SEQUENTIAL"}] + neuroconv_job_submission_info = submit_aws_batch_job( + job_name=neuroconv_job_name, + docker_image=docker_image, + environment_variables={ + "NEUROCONV_YAML": yaml_specification_file_stream, + "NEUROCONV_DATA_PATH": "/mnt/efs/source", + # TODO: would prefer this to use subfolders for source and output, but need logic for YAML + # related code to create them if missing (hard to send EFS this command directly) + # (the code was included in this PR, but a release cycle needs to complete for the docker images before + # it can be used here) + # "NEUROCONV_OUTPUT_PATH": "/mnt/efs/output", + "NEUROCONV_OUTPUT_PATH": "/mnt/efs", + }, + efs_volume_name=efs_volume_name, + job_dependencies=job_dependencies, + status_tracker_table_name=status_tracker_table_name, + compute_environment_name=compute_environment_name, + job_queue_name=job_queue_name, + job_definition_name=job_definition_name, + minimum_worker_ram_in_gib=minimum_worker_ram_in_gib, + minimum_worker_cpus=minimum_worker_cpus, + region=region, + ) + + info = { + "rclone_job_submission_info": rclone_job_submission_info, + "neuroconv_job_submission_info": neuroconv_job_submission_info, + } + + # TODO: would be better to spin up third dependent job to clean up EFS volume after neuroconv job completes + neuroconv_job_id = neuroconv_job_submission_info["job_submission_info"]["jobId"] + job = None + max_retries = 60 * 12 # roughly 12 hours max runtime (aside from internet loss) for checking cleanup + sleep_time = 60 # 1 minute + retry = 0.0 + time.sleep(sleep_time) + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[neuroconv_job_id]) + if job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200: + # sleep but only increment retry by a small amount + # (really should only apply if internet connection is temporarily lost) + retry += 0.1 + time.sleep(sleep_time) + + job = job_description_response["jobs"][0] + if job["status"] in _RETRY_STATES: + retry += 1.0 + time.sleep(sleep_time) + elif job["status"] == "SUCCEEDED": + break + + if retry >= max_retries: + message = ( + "Maximum retries reached for checking job completion for automatic EFS cleanup! " + "Please delete the EFS volume manually." + ) + warnings.warn(message=message, stacklevel=2) + + return info + + # Cleanup EFS after job is complete - must clear mount targets first, then wait before deleting the volume + efs_volumes = efs_client.describe_file_systems() + matching_efs_volumes = [ + file_system + for file_system in efs_volumes["FileSystems"] + for tag in file_system["Tags"] + if tag["Key"] == "Name" and tag["Value"] == efs_volume_name + ] + if len(matching_efs_volumes) != 1: + message = ( + f"Expected to find exactly one EFS volume with name '{efs_volume_name}', " + f"but found {len(matching_efs_volumes)}\n\n{matching_efs_volumes=}\n\n!" + "You will have to delete these manually." + ) + warnings.warn(message=message, stacklevel=2) + + return info + + efs_volume = matching_efs_volumes[0] + efs_id = efs_volume["FileSystemId"] + mount_targets = efs_client.describe_mount_targets(FileSystemId=efs_id) + for mount_target in mount_targets["MountTargets"]: + efs_client.delete_mount_target(MountTargetId=mount_target["MountTargetId"]) + + time.sleep(sleep_time) + efs_client.delete_file_system(FileSystemId=efs_id) + + return info diff --git a/src/neuroconv/tools/aws/_submit_aws_batch_job.py b/src/neuroconv/tools/aws/_submit_aws_batch_job.py index 748f25399..cae25f3ce 100644 --- a/src/neuroconv/tools/aws/_submit_aws_batch_job.py +++ b/src/neuroconv/tools/aws/_submit_aws_batch_job.py @@ -464,11 +464,14 @@ def _create_or_get_efs_id( if tag["Key"] == "Name" and tag["Value"] == efs_volume_name ] - if len(matching_efs_volumes) > 1: + if len(matching_efs_volumes) == 1: efs_volume = matching_efs_volumes[0] efs_id = efs_volume["FileSystemId"] return efs_id + elif len(matching_efs_volumes) > 1: + message = f"Multiple EFS volumes with the name '{efs_volume_name}' were found!\n\n{matching_efs_volumes=}\n" + raise ValueError(message) # Existing volume not found - must create a fresh one and set mount targets on it efs_volume = efs_client.create_file_system( @@ -506,7 +509,7 @@ def _create_or_get_efs_id( return efs_id -def _generate_job_definition_name( +def generate_job_definition_name( *, docker_image: str, minimum_worker_ram_in_gib: int, @@ -515,9 +518,7 @@ def _generate_job_definition_name( ) -> str: # pragma: no cover """ Generate a job definition name for the AWS Batch job. - Note that Docker images don't strictly require a tag to be pulled or used - 'latest' is always used by default. - Parameters ---------- docker_image : str @@ -529,15 +530,13 @@ def _generate_job_definition_name( minimum_worker_cpus : int The minimum number of CPUs required to run this job. A minimum of 4 is required, even if only one will be used in the actual process. + efs_id : Optional[str] + The ID of the EFS filesystem to mount, if any. """ - docker_tags = docker_image.split(":")[1:] - docker_tag = docker_tags[0] if len(docker_tags) > 1 else None - # AWS Batch does not allow colons, slashes, or periods in job definition names parsed_docker_image_name = str(docker_image) - for disallowed_character in [":", r"/", "."]: + for disallowed_character in [":", "/", r"/", "."]: parsed_docker_image_name = parsed_docker_image_name.replace(disallowed_character, "-") - job_definition_name = f"neuroconv_batch" job_definition_name += f"_{parsed_docker_image_name}-image" job_definition_name += f"_{minimum_worker_ram_in_gib}-GiB-RAM" @@ -546,7 +545,6 @@ def _generate_job_definition_name( job_definition_name += f"_{efs_id}" if docker_tag is None or docker_tag == "latest": date = datetime.now().strftime("%Y-%m-%d") - return job_definition_name @@ -644,7 +642,7 @@ def _ensure_job_definition_exists_and_get_arn( }, }, ] - mountPoints = [{"containerPath": "/mnt/efs/", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] + mountPoints = [{"containerPath": "/mnt/efs", "readOnly": False, "sourceVolume": "neuroconv_batch_efs_mounted"}] # batch_client.register_job_definition is not synchronous and so we need to wait a bit afterwards batch_client.register_job_definition( diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 10e33cbc8..0e2f05f74 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -73,10 +73,16 @@ def run_conversion_from_yaml( if data_folder_path is None: data_folder_path = Path(specification_file_path).parent + else: + data_folder_path = Path(data_folder_path) + data_folder_path.mkdir(exist_ok=True) + if output_folder_path is None: - output_folder_path = Path(specification_file_path).parent + output_folder_path = specification_file_path.parent else: output_folder_path = Path(output_folder_path) + output_folder_path.mkdir(exist_ok=True) + specification = load_dict_from_file(file_path=specification_file_path) schema_folder = Path(__file__).parent.parent.parent / "schemas" specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") diff --git a/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py new file mode 100644 index 000000000..f58865d26 --- /dev/null +++ b/tests/test_on_data/test_yaml/neuroconv_deployment_aws_tools_tests.py @@ -0,0 +1,167 @@ +import os +import pathlib +import time +import unittest + +import boto3 + +from neuroconv.tools.aws import deploy_neuroconv_batch_job + +from ..setup_paths import OUTPUT_PATH + +_RETRY_STATES = ["RUNNABLE", "PENDING", "STARTING", "RUNNING"] + + +class TestNeuroConvDeploymentBatchJob(unittest.TestCase): + """ + To allow this test to work, the developer must create a folder on the outer level of their personal Google Drive + called 'testing_rclone_spikegl_and_phy' with the following structure: + + testing_rclone_spikeglx_and_phy + ├── ci_tests + ├──── spikeglx + ├────── Noise4Sam_g0 + ├──── phy + ├────── phy_example_0 + + Where 'Noise4Sam' is from the 'spikeglx/Noise4Sam_g0' GIN ephys dataset and 'phy_example_0' is likewise from the + 'phy' folder of the same dataset. + + Then the developer must install Rclone and call `rclone config` to generate tokens in their own `rclone.conf` file. + The developer can easily find the location of the config file on their system using `rclone config file`. + """ + + test_folder = OUTPUT_PATH / "aws_rclone_tests" + test_config_file_path = test_folder / "rclone.conf" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + region = "us-east-2" + + def setUp(self): + self.test_folder.mkdir(exist_ok=True) + + # Pretend as if .conf file already exists on the system (created via interactive `rclone config` command) + token_dictionary = dict( + access_token=os.environ["RCLONE_DRIVE_ACCESS_TOKEN"], + token_type="Bearer", + refresh_token=os.environ["RCLONE_DRIVE_REFRESH_TOKEN"], + expiry=os.environ["RCLONE_EXPIRY_TOKEN"], + ) + token_string = str(token_dictionary).replace("'", '"').replace(" ", "") + rclone_config_contents = [ + "[test_google_drive_remote]\n", + "type = drive\n", + "scope = drive\n", + f"token = {token_string}\n", + "team_drive = \n", + "\n", + ] + with open(file=self.test_config_file_path, mode="w") as io: + io.writelines(rclone_config_contents) + + def test_deploy_neuroconv_batch_job(self): + region = "us-east-2" + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + + dynamodb_resource = boto3.resource( + service_name="dynamodb", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + batch_client = boto3.client( + service_name="batch", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + efs_client = boto3.client( + service_name="efs", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + # Assume no other tests of EFS volumes are fluctuating at the same time, otherwise make this more specific + efs_volumes_before = efs_client.describe_file_systems() + + rclone_command = ( + "rclone copy test_google_drive_remote:testing_rclone_spikeglx_and_phy/ci_tests /mnt/efs/source " + "--verbose --progress --config ./rclone.conf" # TODO: should just include this in helper function? + ) + + testing_base_folder_path = pathlib.Path(__file__).parent.parent.parent + yaml_specification_file_path = ( + testing_base_folder_path + / "test_on_data" + / "test_yaml" + / "conversion_specifications" + / "GIN_conversion_specification.yml" + ) + + rclone_config_file_path = self.test_config_file_path + + job_name = "test_deploy_neuroconv_batch_job" + efs_volume_name = "test_deploy_neuroconv_batch_job" + all_info = deploy_neuroconv_batch_job( + rclone_command=rclone_command, + yaml_specification_file_path=yaml_specification_file_path, + job_name=job_name, + efs_volume_name=efs_volume_name, + rclone_config_file_path=rclone_config_file_path, + ) + + # Wait additional time for AWS to clean up resources + time.sleep(120) + + info = all_info["neuroconv_job_submission_info"] + job_id = info["job_submission_info"]["jobId"] + job = None + max_retries = 10 + retry = 0 + while retry < max_retries: + job_description_response = batch_client.describe_jobs(jobs=[job_id]) + assert job_description_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + jobs = job_description_response["jobs"] + assert len(jobs) == 1 + + job = jobs[0] + + if job["status"] in _RETRY_STATES: + retry += 1 + time.sleep(60) + else: + break + + # Check EFS cleaned up automatically + efs_volumes_after = efs_client.describe_file_systems() + assert len(efs_volumes_after["FileSystems"]) == len(efs_volumes_before["FileSystems"]) + + # Check normal job completion + expected_job_name = f"{job_name}_neuroconv_deployment" + assert job["jobName"] == expected_job_name + assert "neuroconv_batch_queue" in job["jobQueue"] + assert "fs-" in job["jobDefinition"] + assert job["status"] == "SUCCEEDED" + + status_tracker_table_name = "neuroconv_batch_status_tracker" + table = dynamodb_resource.Table(name=status_tracker_table_name) + table_submission_id = info["table_submission_info"]["id"] + + table_item_response = table.get_item(Key={"id": table_submission_id}) + assert table_item_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + table_item = table_item_response["Item"] + assert table_item["job_name"] == expected_job_name + assert table_item["job_id"] == job_id + assert table_item["status"] == "Job submitted..." + + table.update_item( + Key={"id": table_submission_id}, + AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed - cleaning up..."}}, + ) + + table.update_item( + Key={"id": table_submission_id}, AttributeUpdates={"status": {"Action": "PUT", "Value": "Test passed."}} + ) diff --git a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py index 7ea49e644..e767e516b 100644 --- a/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py +++ b/tests/test_on_data/test_yaml/yaml_aws_tools_tests.py @@ -36,6 +36,7 @@ class TestRcloneTransferBatchJob(unittest.TestCase): aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None) aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) region = "us-east-2" + efs_id = None def setUp(self): self.test_folder.mkdir(exist_ok=True) @@ -66,7 +67,9 @@ def setUp(self): aws_secret_access_key=self.aws_secret_access_key, ) - def tearDown(self): + def tearDown(self) -> None: + if self.efs_id is None: + return None efs_client = self.efs_client # Cleanup EFS after testing is complete - must clear mount targets first, then wait before deleting the volume From 4ba1e827d373153344266f325b002f4b6dffcaad Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 9 Dec 2024 08:31:58 -0600 Subject: [PATCH 25/28] Remove soon to be deprecated jsonschema.RefResolver (#1133) --- CHANGELOG.md | 3 ++- pyproject.toml | 3 ++- .../_yaml_conversion_specification.py | 15 +++++++++++---- .../test_yaml_conversion_specification.py | 17 ++++++++++------- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31a0fd7db..c49e995b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ # v0.6.6 (Upcoming) ## Deprecations -* Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) +* Removed use of `jsonschema.RefResolver` as it will be deprecated from the jsonschema library [PR #1133](https://github.com/catalystneuro/neuroconv/pull/1133) +* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) diff --git a/pyproject.toml b/pyproject.toml index 5efd432f5..a4f391512 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ dependencies = [ "parse>=1.20.0", "click", "docstring-parser", - "packaging" # Issue 903 + "packaging", # Issue 903 + "referencing", ] diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 0e2f05f74..f8a4e8655 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -1,11 +1,11 @@ -import sys from importlib import import_module from pathlib import Path from typing import Optional import click -from jsonschema import RefResolver, validate +from jsonschema import validate from pydantic import DirectoryPath, FilePath +from referencing import Registry, Resource from ...nwbconverter import NWBConverter from ...utils import dict_deep_update, load_dict_from_file @@ -85,12 +85,19 @@ def run_conversion_from_yaml( specification = load_dict_from_file(file_path=specification_file_path) schema_folder = Path(__file__).parent.parent.parent / "schemas" + + # Load all required schemas specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") - sys_uri_base = "file:/" if sys.platform.startswith("win32") else "file://" + metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json") + + # The yaml specification references the metadata schema, so we need to load it into the registry + registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema)) + + # Validate using the registry validate( instance=specification, schema=specification_schema, - resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema), + registry=registry, ) global_metadata = specification.get("metadata", dict()) diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index 61c71cf86..e46e25352 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -1,12 +1,12 @@ -import sys import unittest from datetime import datetime from pathlib import Path import pytest from hdmf.testing import TestCase -from jsonschema import RefResolver, validate +from jsonschema import validate from pynwb import NWBHDF5IO +from referencing import Registry, Resource from neuroconv import run_conversion_from_yaml from neuroconv.utils import load_dict_from_file @@ -27,16 +27,19 @@ def test_validate_example_specifications(fname): path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" schema_folder = path_to_test_yml_files.parent.parent.parent.parent / "src" / "neuroconv" / "schemas" + + # Load schemas specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") - sys_uri_base = "file://" - if sys.platform.startswith("win32"): - sys_uri_base = "file:/" + metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json") + + # The yaml specification references the metadata schema, so we need to load it into the registry + registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema)) yaml_file_path = path_to_test_yml_files / fname validate( instance=load_dict_from_file(file_path=yaml_file_path), - schema=load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json"), - resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema), + schema=specification_schema, + registry=registry, ) From 4b3172c25676eaebe3f3a388e41d4c37d91efd49 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:06:45 -0500 Subject: [PATCH 26/28] Add DANDI upload to YAML spec (#1089) Co-authored-by: Heberto Mayorquin --- .github/workflows/deploy-tests.yml | 3 + .github/workflows/live-service-testing.yml | 16 +++++ CHANGELOG.md | 1 + .../yaml_conversion_specification_schema.json | 1 + .../_yaml_conversion_specification.py | 42 +++++++++++- tests/imports.py | 1 + ..._conversion_specification_dandi_upload.yml | 66 +++++++++++++++++++ .../test_yaml_conversion_specification.py | 1 + .../test_yaml/yaml_dandi_transfer_tools.py | 53 +++++++++++++++ 9 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml create mode 100644 tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index a18fe8310..a2e56b00a 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -69,6 +69,9 @@ jobs: if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} with: # Ternary operator: condition && value_if_true || value_if_false python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index 24eda7bc3..155438fb2 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -13,6 +13,12 @@ on: type: string secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + S3_GIN_BUCKET: + required: true DANDI_API_KEY: required: true @@ -45,7 +51,17 @@ jobs: - name: Install full requirements run: pip install .[test,full] + - name: Prepare data for tests + uses: ./.github/actions/load-data + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} + - name: Run subset of tests that use DANDI live services run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py + - name: Run subset of tests that use DANDI live services with YAML + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py - name: Run subset of tests that use Globus live services run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c49e995b9..ae105b907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) * Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086) +* YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089) ## Improvements diff --git a/src/neuroconv/schemas/yaml_conversion_specification_schema.json b/src/neuroconv/schemas/yaml_conversion_specification_schema.json index c6526803b..039a1cf48 100644 --- a/src/neuroconv/schemas/yaml_conversion_specification_schema.json +++ b/src/neuroconv/schemas/yaml_conversion_specification_schema.json @@ -8,6 +8,7 @@ "required": ["experiments"], "additionalProperties": false, "properties": { + "upload_to_dandiset": {"type": "string"}, "metadata": {"$ref": "./metadata_schema.json#"}, "conversion_options": {"type": "object"}, "data_interfaces": { diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index f8a4e8655..7cdec0d2c 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -1,3 +1,5 @@ +import json +import os from importlib import import_module from pathlib import Path from typing import Optional @@ -7,6 +9,7 @@ from pydantic import DirectoryPath, FilePath from referencing import Registry, Resource +from ..data_transfers import automatic_dandi_upload from ...nwbconverter import NWBConverter from ...utils import dict_deep_update, load_dict_from_file @@ -50,7 +53,7 @@ def run_conversion_from_yaml( data_folder_path: Optional[DirectoryPath] = None, output_folder_path: Optional[DirectoryPath] = None, overwrite: bool = False, -): +) -> None: """ Run conversion to NWB given a yaml specification file. @@ -100,6 +103,14 @@ def run_conversion_from_yaml( registry=registry, ) + upload_to_dandiset = "upload_to_dandiset" in specification + if upload_to_dandiset and "DANDI_API_KEY" not in os.environ: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the environment variable 'DANDI_API_KEY' was not set." + ) + raise ValueError(message) + global_metadata = specification.get("metadata", dict()) global_conversion_options = specification.get("conversion_options", dict()) data_interfaces_spec = specification.get("data_interfaces") @@ -115,6 +126,7 @@ def run_conversion_from_yaml( experiment_metadata = experiment.get("metadata", dict()) for session in experiment["sessions"]: file_counter += 1 + source_data = session["source_data"] for interface_name, interface_source_data in session["source_data"].items(): for key, value in interface_source_data.items(): @@ -122,21 +134,47 @@ def run_conversion_from_yaml( source_data[interface_name].update({key: [str(Path(data_folder_path) / x) for x in value]}) elif key in ("file_path", "folder_path"): source_data[interface_name].update({key: str(Path(data_folder_path) / value)}) + converter = CustomNWBConverter(source_data=source_data) + metadata = converter.get_metadata() for metadata_source in [global_metadata, experiment_metadata, session.get("metadata", dict())]: metadata = dict_deep_update(metadata, metadata_source) - nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") + + session_id = session.get("metadata", dict()).get("NWBFile", dict()).get("session_id", None) + if upload_to_dandiset and session_id is None: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the 'session_id' was not found for session with info block: " + f"\n\n {json.dumps(obj=session, indent=2)}\n\n" + "File intended for DANDI upload must include a session ID." + ) + raise ValueError(message) + session_conversion_options = session.get("conversion_options", dict()) conversion_options = dict() for key in converter.data_interface_objects: conversion_options[key] = dict(session_conversion_options.get(key, dict()), **global_conversion_options) + + nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") converter.run_conversion( nwbfile_path=output_folder_path / f"{nwbfile_name}.nwb", metadata=metadata, overwrite=overwrite, conversion_options=conversion_options, ) + + if upload_to_dandiset: + dandiset_id = specification["upload_to_dandiset"] + staging = int(dandiset_id) >= 200_000 + automatic_dandi_upload( + dandiset_id=dandiset_id, + nwb_folder_path=output_folder_path, + staging=staging, + ) + + return None # We can early return since organization below will occur within the upload step + # To properly mimic a true dandi organization, the full directory must be populated with NWBFiles. all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"] nwbfile_paths_to_set = [ diff --git a/tests/imports.py b/tests/imports.py index 5f8b65e72..7ac95713b 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -68,6 +68,7 @@ def test_tools(self): "get_package_version", "is_package_installed", "deploy_process", + "data_transfers", "LocalPathExpander", "get_module", ] diff --git a/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml new file mode 100644 index 000000000..adf590d3a --- /dev/null +++ b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml @@ -0,0 +1,66 @@ +metadata: + NWBFile: + lab: My Lab + institution: My Institution + +conversion_options: + stub_test: True + +data_interfaces: + ap: SpikeGLXRecordingInterface + lf: SpikeGLXRecordingInterface + phy: PhySortingInterface + +upload_to_dandiset: "200560" + +experiments: + ymaze: + metadata: + NWBFile: + session_description: Subject navigating a Y-shaped maze. + + sessions: + - nwbfile_name: example_converter_spec_1 + source_data: + ap: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + metadata: + NWBFile: + session_start_time: "2020-10-09T21:19:09+00:00" + session_id: "test-yaml-1" + Subject: + subject_id: "yaml-1" + sex: F + age: P35D + species: Mus musculus + - nwbfile_name: example_converter_spec_2.nwb + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + session_id: "test-yaml-2" + Subject: + subject_id: "yaml-002" + sex: F + age: P35D + species: Mus musculus + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + + open_explore: + sessions: + - nwbfile_name: example_converter_spec_3 + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + phy: + folder_path: phy/phy_example_0/ + metadata: + NWBFile: + session_start_time: "2020-10-11T21:19:09+00:00" + session_id: test YAML 3 + Subject: + subject_id: YAML Subject Name + sex: F + age: P35D + species: Mus musculus diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index e46e25352..5a623d141 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -19,6 +19,7 @@ "fname", [ "GIN_conversion_specification.yml", + "GIN_conversion_specification_dandi_upload.yml", "GIN_conversion_specification_missing_nwbfile_names.yml", "GIN_conversion_specification_no_nwbfile_name_or_other_metadata.yml", "GIN_conversion_specification_videos.yml", diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py new file mode 100644 index 000000000..c36d072e7 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -0,0 +1,53 @@ +import os +import platform +import time +from datetime import datetime, timedelta +from pathlib import Path + +import dandi.dandiapi +import pytest +from packaging.version import Version + +from neuroconv import run_conversion_from_yaml + +from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH + +DANDI_API_KEY = os.getenv("DANDI_API_KEY") +HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks +_PYTHON_VERSION = platform.python_version() + + +@pytest.mark.skipif( + not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12"), + reason="You must set your DANDI_API_KEY to run this test!", +) +def test_run_conversion_from_yaml_with_dandi_upload(): + path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" + run_conversion_from_yaml( + specification_file_path=yaml_file_path, + data_folder_path=ECEPHY_DATA_PATH, + output_folder_path=OUTPUT_PATH, + overwrite=True, + ) + + time.sleep(60) # Give some buffer room for server to process before making assertions against DANDI API + + client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") + dandiset = client.get_dandiset("200560") + + expected_asset_paths = [ + "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", + "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-YAML-3_ecephys.nwb", + ] + for asset_path in expected_asset_paths: + test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found + test_asset_metadata = test_asset.get_raw_metadata() + + # Past uploads may have created the same apparent file, so look at the modification time to ensure + # this test is actually testing the most recent upload + date_modified = datetime.fromisoformat( + test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy + ) + assert datetime.now() - date_modified < timedelta(minutes=10) From 737468c63f17f206488cbba57484e6c9df200ff5 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 10 Dec 2024 09:38:59 -0600 Subject: [PATCH 27/28] Support arbitrary multi probe and multi trigger structures in spikeglx (#1150) --- CHANGELOG.md | 2 + .../ecephys/spikeglx/spikeglxconverter.py | 31 ++++------ .../ecephys/spikeglx/spikeglxdatainterface.py | 56 +++++++++++++------ .../ecephys/spikeglx/spikeglxnidqinterface.py | 27 ++++++--- .../spikeglx_single_probe_metadata.json | 12 ++-- tests/test_on_data/ecephys/test_lfp.py | 4 +- .../ecephys/test_recording_interfaces.py | 4 +- .../ecephys/test_spikeglx_converter.py | 31 +++++----- 8 files changed, 95 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae105b907..65d3515f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,13 @@ ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) * Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144) +* `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150) ## Features * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) +* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior equivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150) * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) * Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086) * YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089) diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py index 007c3177c..029955d24 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py @@ -29,8 +29,10 @@ def get_source_schema(cls): @classmethod def get_streams(cls, folder_path: DirectoryPath) -> list[str]: + "Return the stream ids available in the folder." from spikeinterface.extractors import SpikeGLXRecordingExtractor + # The first entry is the stream ids the second is the stream names return SpikeGLXRecordingExtractor.get_streams(folder_path=folder_path)[0] @validate_call @@ -61,28 +63,17 @@ def __init__( """ folder_path = Path(folder_path) - streams = streams or self.get_streams(folder_path=folder_path) + streams_ids = streams or self.get_streams(folder_path=folder_path) data_interfaces = dict() - for stream in streams: - if "ap" in stream: - probe_name = stream[:5] - file_path = ( - folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.ap.bin" - ) - es_key = f"ElectricalSeriesAP{probe_name.capitalize()}" - interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key) - elif "lf" in stream: - probe_name = stream[:5] - file_path = ( - folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.lf.bin" - ) - es_key = f"ElectricalSeriesLF{probe_name.capitalize()}" - interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key) - elif "nidq" in stream: - file_path = folder_path / f"{folder_path.stem}_t0.nidq.bin" - interface = SpikeGLXNIDQInterface(file_path=file_path) - data_interfaces.update({str(stream): interface}) # Without str() casting, is a numpy string + + nidq_streams = [stream_id for stream_id in streams_ids if stream_id == "nidq"] + electrical_streams = [stream_id for stream_id in streams_ids if stream_id not in nidq_streams] + for stream_id in electrical_streams: + data_interfaces[stream_id] = SpikeGLXRecordingInterface(folder_path=folder_path, stream_id=stream_id) + + for stream_id in nidq_streams: + data_interfaces[stream_id] = SpikeGLXNIDQInterface(folder_path=folder_path) super().__init__(data_interfaces=data_interfaces, verbose=verbose) diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py index c15516431..00419e036 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py @@ -4,7 +4,7 @@ from typing import Optional import numpy as np -from pydantic import FilePath, validate_call +from pydantic import DirectoryPath, FilePath, validate_call from .spikeglx_utils import ( add_recording_extractor_properties, @@ -45,7 +45,6 @@ def get_source_schema(cls) -> dict: def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: extractor_kwargs = source_data.copy() - extractor_kwargs.pop("file_path") extractor_kwargs["folder_path"] = self.folder_path extractor_kwargs["all_annotations"] = True extractor_kwargs["stream_id"] = self.stream_id @@ -54,38 +53,59 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: @validate_call def __init__( self, - file_path: FilePath, + file_path: Optional[FilePath] = None, verbose: bool = True, es_key: Optional[str] = None, + folder_path: Optional[DirectoryPath] = None, + stream_id: Optional[str] = None, ): """ Parameters ---------- + folder_path: DirectoryPath + Folder path containing the binary files of the SpikeGLX recording. + stream_id: str, optional + Stream ID of the SpikeGLX recording. + Examples are 'nidq', 'imec0.ap', 'imec0.lf', 'imec1.ap', 'imec1.lf', etc. file_path : FilePathType Path to .bin file. Point to .ap.bin for SpikeGLXRecordingInterface and .lf.bin for SpikeGLXLFPInterface. verbose : bool, default: True Whether to output verbose text. - es_key : str, default: "ElectricalSeries" + es_key : str, the key to access the metadata of the ElectricalSeries. """ - self.stream_id = fetch_stream_id_for_spikelgx_file(file_path) - if es_key is None: - if "lf" in self.stream_id: - es_key = "ElectricalSeriesLF" - elif "ap" in self.stream_id: - es_key = "ElectricalSeriesAP" - else: - raise ValueError("Cannot automatically determine es_key from path") - file_path = Path(file_path) - self.folder_path = file_path.parent + if file_path is not None and stream_id is None: + self.stream_id = fetch_stream_id_for_spikelgx_file(file_path) + self.folder_path = Path(file_path).parent + + else: + self.stream_id = stream_id + self.folder_path = Path(folder_path) super().__init__( - file_path=file_path, + folder_path=folder_path, verbose=verbose, es_key=es_key, ) - self.source_data["file_path"] = str(file_path) - self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, self.stream_id)]["meta"] + + signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id) + self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key] + self.meta = self._signals_info_dict["meta"] + + if es_key is None: + stream_kind = self._signals_info_dict["stream_kind"] # ap or lf + stream_kind_caps = stream_kind.upper() + device = self._signals_info_dict["device"].capitalize() # imec0, imec1, etc. + + electrical_series_name = f"ElectricalSeries{stream_kind_caps}" + + # Add imec{probe_index} to the electrical series name when there are multiple probes + # or undefined, `typeImEnabled` is present in the meta of all the production probes + self.probes_enabled_in_run = int(self.meta.get("typeImEnabled", 0)) + if self.probes_enabled_in_run != 1: + electrical_series_name += f"{device}" + + self.es_key = electrical_series_name # Set electrodes properties add_recording_extractor_properties(self.recording_extractor) @@ -100,7 +120,7 @@ def get_metadata(self) -> dict: device = get_device_metadata(self.meta) # Should follow pattern 'Imec0', 'Imec1', etc. - probe_name = self.stream_id[:5].capitalize() + probe_name = self._signals_info_dict["device"].capitalize() device["name"] = f"Neuropixel{probe_name}" # Add groups metadata diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py index 3cf50080a..1d7079716 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py @@ -1,7 +1,8 @@ from pathlib import Path +from typing import Optional import numpy as np -from pydantic import ConfigDict, FilePath, validate_call +from pydantic import ConfigDict, DirectoryPath, FilePath, validate_call from .spikeglx_utils import get_session_start_time from ..baserecordingextractorinterface import BaseRecordingExtractorInterface @@ -29,7 +30,6 @@ def get_source_schema(cls) -> dict: def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: extractor_kwargs = source_data.copy() - extractor_kwargs.pop("file_path") extractor_kwargs["folder_path"] = self.folder_path extractor_kwargs["stream_id"] = self.stream_id return extractor_kwargs @@ -37,10 +37,11 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def __init__( self, - file_path: FilePath, + file_path: Optional[FilePath] = None, verbose: bool = True, load_sync_channel: bool = False, es_key: str = "ElectricalSeriesNIDQ", + folder_path: Optional[DirectoryPath] = None, ): """ Read channel data from the NIDQ board for the SpikeGLX recording. @@ -49,6 +50,8 @@ def __init__( Parameters ---------- + folder_path : DirectoryPath + Path to the folder containing the .nidq.bin file. file_path : FilePathType Path to .nidq.bin file. verbose : bool, default: True @@ -59,10 +62,17 @@ def __init__( es_key : str, default: "ElectricalSeriesNIDQ" """ - self.file_path = Path(file_path) - self.folder_path = self.file_path.parent + if file_path is None and folder_path is None: + raise ValueError("Either 'file_path' or 'folder_path' must be provided.") + + if file_path is not None: + file_path = Path(file_path) + self.folder_path = file_path.parent + + if folder_path is not None: + self.folder_path = Path(folder_path) + super().__init__( - file_path=self.file_path, verbose=verbose, load_sync_channel=load_sync_channel, es_key=es_key, @@ -72,7 +82,10 @@ def __init__( self.recording_extractor.set_property( key="group_name", values=["NIDQChannelGroup"] * self.recording_extractor.get_num_channels() ) - self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, "nidq")]["meta"] + + signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id) + self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key] + self.meta = self._signals_info_dict["meta"] def get_metadata(self) -> dict: metadata = super().get_metadata() diff --git a/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json b/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json index 637124cd0..20f11742b 100644 --- a/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json +++ b/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json @@ -29,9 +29,9 @@ "device": "NIDQBoard" } ], - "ElectricalSeriesAPImec0": { - "name": "ElectricalSeriesAPImec0", - "description": "Acquisition traces for the ElectricalSeriesAPImec0." + "ElectricalSeriesAP": { + "name": "ElectricalSeriesAP", + "description": "Acquisition traces for the ElectricalSeriesAP." }, "Electrodes": [ { @@ -51,9 +51,9 @@ "name": "ElectricalSeriesNIDQ", "description": "Raw acquisition traces from the NIDQ (.nidq.bin) channels." }, - "ElectricalSeriesLFImec0": { - "name": "ElectricalSeriesLFImec0", - "description": "Acquisition traces for the ElectricalSeriesLFImec0." + "ElectricalSeriesLF": { + "name": "ElectricalSeriesLF", + "description": "Acquisition traces for the ElectricalSeriesLF." } } } diff --git a/tests/test_on_data/ecephys/test_lfp.py b/tests/test_on_data/ecephys/test_lfp.py index c46f8d297..010516d86 100644 --- a/tests/test_on_data/ecephys/test_lfp.py +++ b/tests/test_on_data/ecephys/test_lfp.py @@ -57,9 +57,7 @@ class TestEcephysLFPNwbConversions(unittest.TestCase): param( data_interface=SpikeGLXRecordingInterface, interface_kwargs=dict( - file_path=( - DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.lf.bin" - ) + folder_path=DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.lf" ), expected_write_module="raw", ), diff --git a/tests/test_on_data/ecephys/test_recording_interfaces.py b/tests/test_on_data/ecephys/test_recording_interfaces.py index 7677ded22..0520b5b42 100644 --- a/tests/test_on_data/ecephys/test_recording_interfaces.py +++ b/tests/test_on_data/ecephys/test_recording_interfaces.py @@ -641,9 +641,7 @@ def test_extracted_metadata(self, setup_interface): class TestSpikeGLXRecordingInterface(RecordingExtractorInterfaceTestMixin): data_interface_cls = SpikeGLXRecordingInterface interface_kwargs = dict( - file_path=str( - ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.ap.bin" - ) + folder_path=ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.ap" ) save_directory = OUTPUT_PATH diff --git a/tests/test_on_data/ecephys/test_spikeglx_converter.py b/tests/test_on_data/ecephys/test_spikeglx_converter.py index af98789c1..970a815af 100644 --- a/tests/test_on_data/ecephys/test_spikeglx_converter.py +++ b/tests/test_on_data/ecephys/test_spikeglx_converter.py @@ -33,10 +33,10 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ with NWBHDF5IO(path=nwbfile_path) as io: nwbfile = io.read() - assert nwbfile.session_start_time == expected_session_start_time + assert nwbfile.session_start_time.replace(tzinfo=None) == expected_session_start_time - assert "ElectricalSeriesAPImec0" in nwbfile.acquisition - assert "ElectricalSeriesLFImec0" in nwbfile.acquisition + assert "ElectricalSeriesAP" in nwbfile.acquisition + assert "ElectricalSeriesLF" in nwbfile.acquisition assert "ElectricalSeriesNIDQ" in nwbfile.acquisition assert len(nwbfile.acquisition) == 3 @@ -74,7 +74,7 @@ def test_single_probe_spikeglx_converter(self): nwbfile_path = self.tmpdir / "test_single_probe_spikeglx_converter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) def test_in_converter_pipe(self): @@ -84,7 +84,7 @@ def test_in_converter_pipe(self): nwbfile_path = self.tmpdir / "test_spikeglx_converter_in_converter_pipe.nwb" converter_pipe.run_conversion(nwbfile_path=nwbfile_path) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) def test_in_nwbconverter(self): @@ -101,7 +101,7 @@ class TestConverter(NWBConverter): nwbfile_path = self.tmpdir / "test_spikeglx_converter_in_nwbconverter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) @@ -118,7 +118,9 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ with NWBHDF5IO(path=nwbfile_path) as io: nwbfile = io.read() - assert nwbfile.session_start_time == expected_session_start_time + # Do the comparison without timezone information to avoid CI timezone issues + # The timezone is set by pynbw automatically + assert nwbfile.session_start_time.replace(tzinfo=None) == expected_session_start_time # TODO: improve name of segments using 'Segment{index}' for clarity assert "ElectricalSeriesAPImec00" in nwbfile.acquisition @@ -129,7 +131,7 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ assert "ElectricalSeriesLFImec01" in nwbfile.acquisition assert "ElectricalSeriesLFImec10" in nwbfile.acquisition assert "ElectricalSeriesLFImec11" in nwbfile.acquisition - assert len(nwbfile.acquisition) == 8 + assert len(nwbfile.acquisition) == 16 assert "NeuropixelImec0" in nwbfile.devices assert "NeuropixelImec1" in nwbfile.devices @@ -141,7 +143,7 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ def test_multi_probe_spikeglx_converter(self): converter = SpikeGLXConverterPipe( - folder_path=SPIKEGLX_PATH / "multi_trigger_multi_gate" / "SpikeGLX" / "5-19-2022-CI0" / "5-19-2022-CI0_g0" + folder_path=SPIKEGLX_PATH / "multi_trigger_multi_gate" / "SpikeGLX" / "5-19-2022-CI0" ) metadata = converter.get_metadata() @@ -161,13 +163,12 @@ def test_multi_probe_spikeglx_converter(self): expected_device_metadata = expected_ecephys_metadata.pop("Device") assert device_metadata == expected_device_metadata - assert test_ecephys_metadata == expected_ecephys_metadata nwbfile_path = self.tmpdir / "test_multi_probe_spikeglx_converter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata) - expected_session_start_time = datetime(2022, 5, 19, 17, 37, 47).astimezone() + expected_session_start_time = datetime(2022, 5, 19, 17, 37, 47) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) @@ -193,7 +194,7 @@ def test_electrode_table_writing(tmp_path): np.testing.assert_array_equal(saved_channel_names, expected_channel_names_nidq) # Test AP - electrical_series = nwbfile.acquisition["ElectricalSeriesAPImec0"] + electrical_series = nwbfile.acquisition["ElectricalSeriesAP"] ap_electrodes_table_region = electrical_series.electrodes region_indices = ap_electrodes_table_region.data recording_extractor = converter.data_interface_objects["imec0.ap"].recording_extractor @@ -203,7 +204,7 @@ def test_electrode_table_writing(tmp_path): np.testing.assert_array_equal(saved_channel_names, expected_channel_names_ap) # Test LF - electrical_series = nwbfile.acquisition["ElectricalSeriesLFImec0"] + electrical_series = nwbfile.acquisition["ElectricalSeriesLF"] lf_electrodes_table_region = electrical_series.electrodes region_indices = lf_electrodes_table_region.data recording_extractor = converter.data_interface_objects["imec0.lf"].recording_extractor @@ -222,7 +223,7 @@ def test_electrode_table_writing(tmp_path): # Test round trip with spikeinterface recording_extractor_ap = NwbRecordingExtractor( file_path=nwbfile_path, - electrical_series_name="ElectricalSeriesAPImec0", + electrical_series_name="ElectricalSeriesAP", ) channel_ids = recording_extractor_ap.get_channel_ids() @@ -230,7 +231,7 @@ def test_electrode_table_writing(tmp_path): recording_extractor_lf = NwbRecordingExtractor( file_path=nwbfile_path, - electrical_series_name="ElectricalSeriesLFImec0", + electrical_series_name="ElectricalSeriesLF", ) channel_ids = recording_extractor_lf.get_channel_ids() From 1032e151d9515413d64f48c315c03cc3a84ffac8 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 10 Dec 2024 12:21:02 -0600 Subject: [PATCH 28/28] restore public status of `NWBMetaDataEncoder` encoder (#1142) --- CHANGELOG.md | 1 + docs/api/utils.rst | 2 + src/neuroconv/basedatainterface.py | 3 +- .../neuralynx/neuralynx_nvt_interface.py | 3 +- .../nwb_helpers/_metadata_and_file_helpers.py | 5 +-- .../tools/testing/data_interface_mixins.py | 2 +- src/neuroconv/utils/__init__.py | 2 +- src/neuroconv/utils/json_schema.py | 44 +++++++------------ .../test_tools/test_expand_paths.py | 2 +- .../test_utils/test_json_schema_utils.py | 2 +- 10 files changed, 28 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65d3515f1..728123253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) +* Make `NWBMetaDataEncoder` public again [PR #1142](https://github.com/catalystneuro/neuroconv/pull/1142) * Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144) * `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150) diff --git a/docs/api/utils.rst b/docs/api/utils.rst index 4f19f7cee..c9b85b14c 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -8,6 +8,8 @@ Dictionaries JSON Schema ----------- .. automodule:: neuroconv.utils.json_schema + :members: + :exclude-members: NWBMetaDataEncoder Common Reused Types ------------------- diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py index 272abbd0c..530eec60c 100644 --- a/src/neuroconv/basedatainterface.py +++ b/src/neuroconv/basedatainterface.py @@ -19,12 +19,11 @@ ) from .tools.nwb_helpers._metadata_and_file_helpers import _resolve_backend from .utils import ( - _NWBMetaDataEncoder, get_json_schema_from_method_signature, load_dict_from_file, ) from .utils.dict import DeepDict -from .utils.json_schema import _NWBSourceDataEncoder +from .utils.json_schema import _NWBMetaDataEncoder, _NWBSourceDataEncoder class BaseDataInterface(ABC): diff --git a/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py b/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py index e161387f0..213adf731 100644 --- a/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py +++ b/src/neuroconv/datainterfaces/behavior/neuralynx/neuralynx_nvt_interface.py @@ -8,7 +8,8 @@ from .nvt_utils import read_data, read_header from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface -from ....utils import DeepDict, _NWBMetaDataEncoder, get_base_schema +from ....utils import DeepDict, get_base_schema +from ....utils.json_schema import _NWBMetaDataEncoder from ....utils.path import infer_path diff --git a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py index c3aaea48d..355c86510 100644 --- a/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py +++ b/src/neuroconv/tools/nwb_helpers/_metadata_and_file_helpers.py @@ -8,7 +8,6 @@ from datetime import datetime from pathlib import Path from typing import Literal, Optional -from warnings import warn from hdmf_zarr import NWBZarrIO from pydantic import FilePath @@ -26,7 +25,7 @@ def get_module(nwbfile: NWBFile, name: str, description: str = None): """Check if processing module exists. If not, create it. Then return module.""" if name in nwbfile.processing: if description is not None and nwbfile.processing[name].description != description: - warn( + warnings.warn( "Custom description given to get_module does not match existing module description! " "Ignoring custom description." ) @@ -157,7 +156,7 @@ def _attempt_cleanup_of_existing_nwbfile(nwbfile_path: Path) -> None: # Windows in particular can encounter errors at this step except PermissionError: # pragma: no cover message = f"Unable to remove NWB file located at {nwbfile_path.absolute()}! Please remove it manually." - warn(message=message, stacklevel=2) + warnings.warn(message=message, stacklevel=2) @contextmanager diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index fab049165..dc45cec53 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -33,7 +33,7 @@ configure_backend, get_default_backend_configuration, ) -from neuroconv.utils import _NWBMetaDataEncoder +from neuroconv.utils.json_schema import _NWBMetaDataEncoder class DataInterfaceTestMixin: diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index c0061a983..f7163f3ff 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -7,7 +7,7 @@ load_dict_from_file, ) from .json_schema import ( - _NWBMetaDataEncoder, + NWBMetaDataEncoder, fill_defaults, get_base_schema, get_metadata_schema_for_icephys, diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py index 07dc3321f..6aa7a75d0 100644 --- a/src/neuroconv/utils/json_schema.py +++ b/src/neuroconv/utils/json_schema.py @@ -16,13 +16,8 @@ from pynwb.icephys import IntracellularElectrode -class _NWBMetaDataEncoder(json.JSONEncoder): - """ - Custom JSON encoder for NWB metadata. - - This encoder extends the default JSONEncoder class and provides custom serialization - for certain data types commonly used in NWB metadata. - """ +class _GenericNeuroconvEncoder(json.JSONEncoder): + """Generic JSON encoder for NeuroConv data.""" def default(self, obj): """ @@ -36,45 +31,38 @@ def default(self, obj): if isinstance(obj, np.generic): return obj.item() + # Numpy arrays should be converted to lists if isinstance(obj, np.ndarray): return obj.tolist() + # Over-write behaviors for Paths + if isinstance(obj, Path): + return str(obj) + # The base-class handles it return super().default(obj) -class _NWBSourceDataEncoder(_NWBMetaDataEncoder): +class _NWBMetaDataEncoder(_GenericNeuroconvEncoder): """ - Custom JSON encoder for data interface source data (i.e. kwargs). - - This encoder extends the default JSONEncoder class and provides custom serialization - for certain data types commonly used in interface source data. + Custom JSON encoder for NWB metadata. """ - def default(self, obj): - # Over-write behaviors for Paths - if isinstance(obj, Path): - return str(obj) - - return super().default(obj) +class _NWBSourceDataEncoder(_GenericNeuroconvEncoder): + """ + Custom JSON encoder for data interface source data (i.e. kwargs). + """ -class _NWBConversionOptionsEncoder(_NWBMetaDataEncoder): +class _NWBConversionOptionsEncoder(_GenericNeuroconvEncoder): """ Custom JSON encoder for conversion options of the data interfaces and converters (i.e. kwargs). - - This encoder extends the default JSONEncoder class and provides custom serialization - for certain data types commonly used in interface source data. """ - def default(self, obj): - - # Over-write behaviors for Paths - if isinstance(obj, Path): - return str(obj) - return super().default(obj) +# This is used in the Guide so we will keep it public. +NWBMetaDataEncoder = _NWBMetaDataEncoder def get_base_schema( diff --git a/tests/test_minimal/test_tools/test_expand_paths.py b/tests/test_minimal/test_tools/test_expand_paths.py index 9e7f03631..59924f93a 100644 --- a/tests/test_minimal/test_tools/test_expand_paths.py +++ b/tests/test_minimal/test_tools/test_expand_paths.py @@ -9,7 +9,7 @@ from neuroconv.tools import LocalPathExpander from neuroconv.tools.path_expansion import construct_path_template from neuroconv.tools.testing import generate_path_expander_demo_ibl -from neuroconv.utils import _NWBMetaDataEncoder +from neuroconv.utils.json_schema import _NWBMetaDataEncoder def create_test_directories_and_files( diff --git a/tests/test_minimal/test_utils/test_json_schema_utils.py b/tests/test_minimal/test_utils/test_json_schema_utils.py index 4edf1e724..5ce63ee56 100644 --- a/tests/test_minimal/test_utils/test_json_schema_utils.py +++ b/tests/test_minimal/test_utils/test_json_schema_utils.py @@ -6,12 +6,12 @@ from pynwb.ophys import ImagingPlane, TwoPhotonSeries from neuroconv.utils import ( - _NWBMetaDataEncoder, dict_deep_update, fill_defaults, get_schema_from_hdmf_class, load_dict_from_file, ) +from neuroconv.utils.json_schema import _NWBMetaDataEncoder def compare_dicts(a: dict, b: dict):