From 4ba1e827d373153344266f325b002f4b6dffcaad Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 9 Dec 2024 08:31:58 -0600 Subject: [PATCH 1/3] Remove soon to be deprecated jsonschema.RefResolver (#1133) --- CHANGELOG.md | 3 ++- pyproject.toml | 3 ++- .../_yaml_conversion_specification.py | 15 +++++++++++---- .../test_yaml_conversion_specification.py | 17 ++++++++++------- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31a0fd7db..c49e995b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ # v0.6.6 (Upcoming) ## Deprecations -* Completely removed compression settings from most places [PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) +* Removed use of `jsonschema.RefResolver` as it will be deprecated from the jsonschema library [PR #1133](https://github.com/catalystneuro/neuroconv/pull/1133) +* Completely removed compression settings from most places[PR #1126](https://github.com/catalystneuro/neuroconv/pull/1126) ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) diff --git a/pyproject.toml b/pyproject.toml index 5efd432f5..a4f391512 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ dependencies = [ "parse>=1.20.0", "click", "docstring-parser", - "packaging" # Issue 903 + "packaging", # Issue 903 + "referencing", ] diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 0e2f05f74..f8a4e8655 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -1,11 +1,11 @@ -import sys from importlib import import_module from pathlib import Path from typing import Optional import click -from jsonschema import RefResolver, validate +from jsonschema import validate from pydantic import DirectoryPath, FilePath +from referencing import Registry, Resource from ...nwbconverter import NWBConverter from ...utils import dict_deep_update, load_dict_from_file @@ -85,12 +85,19 @@ def run_conversion_from_yaml( specification = load_dict_from_file(file_path=specification_file_path) schema_folder = Path(__file__).parent.parent.parent / "schemas" + + # Load all required schemas specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") - sys_uri_base = "file:/" if sys.platform.startswith("win32") else "file://" + metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json") + + # The yaml specification references the metadata schema, so we need to load it into the registry + registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema)) + + # Validate using the registry validate( instance=specification, schema=specification_schema, - resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema), + registry=registry, ) global_metadata = specification.get("metadata", dict()) diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index 61c71cf86..e46e25352 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -1,12 +1,12 @@ -import sys import unittest from datetime import datetime from pathlib import Path import pytest from hdmf.testing import TestCase -from jsonschema import RefResolver, validate +from jsonschema import validate from pynwb import NWBHDF5IO +from referencing import Registry, Resource from neuroconv import run_conversion_from_yaml from neuroconv.utils import load_dict_from_file @@ -27,16 +27,19 @@ def test_validate_example_specifications(fname): path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" schema_folder = path_to_test_yml_files.parent.parent.parent.parent / "src" / "neuroconv" / "schemas" + + # Load schemas specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") - sys_uri_base = "file://" - if sys.platform.startswith("win32"): - sys_uri_base = "file:/" + metadata_schema = load_dict_from_file(file_path=schema_folder / "metadata_schema.json") + + # The yaml specification references the metadata schema, so we need to load it into the registry + registry = Registry().with_resource("metadata_schema.json", Resource.from_contents(metadata_schema)) yaml_file_path = path_to_test_yml_files / fname validate( instance=load_dict_from_file(file_path=yaml_file_path), - schema=load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json"), - resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema), + schema=specification_schema, + registry=registry, ) From 4b3172c25676eaebe3f3a388e41d4c37d91efd49 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:06:45 -0500 Subject: [PATCH 2/3] Add DANDI upload to YAML spec (#1089) Co-authored-by: Heberto Mayorquin --- .github/workflows/deploy-tests.yml | 3 + .github/workflows/live-service-testing.yml | 16 +++++ CHANGELOG.md | 1 + .../yaml_conversion_specification_schema.json | 1 + .../_yaml_conversion_specification.py | 42 +++++++++++- tests/imports.py | 1 + ..._conversion_specification_dandi_upload.yml | 66 +++++++++++++++++++ .../test_yaml_conversion_specification.py | 1 + .../test_yaml/yaml_dandi_transfer_tools.py | 53 +++++++++++++++ 9 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml create mode 100644 tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index a18fe8310..a2e56b00a 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -69,6 +69,9 @@ jobs: if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} with: # Ternary operator: condition && value_if_true || value_if_false python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index 24eda7bc3..155438fb2 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -13,6 +13,12 @@ on: type: string secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + S3_GIN_BUCKET: + required: true DANDI_API_KEY: required: true @@ -45,7 +51,17 @@ jobs: - name: Install full requirements run: pip install .[test,full] + - name: Prepare data for tests + uses: ./.github/actions/load-data + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} + - name: Run subset of tests that use DANDI live services run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py + - name: Run subset of tests that use DANDI live services with YAML + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py - name: Run subset of tests that use Globus live services run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c49e995b9..ae105b907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) * Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086) +* YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089) ## Improvements diff --git a/src/neuroconv/schemas/yaml_conversion_specification_schema.json b/src/neuroconv/schemas/yaml_conversion_specification_schema.json index c6526803b..039a1cf48 100644 --- a/src/neuroconv/schemas/yaml_conversion_specification_schema.json +++ b/src/neuroconv/schemas/yaml_conversion_specification_schema.json @@ -8,6 +8,7 @@ "required": ["experiments"], "additionalProperties": false, "properties": { + "upload_to_dandiset": {"type": "string"}, "metadata": {"$ref": "./metadata_schema.json#"}, "conversion_options": {"type": "object"}, "data_interfaces": { diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index f8a4e8655..7cdec0d2c 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -1,3 +1,5 @@ +import json +import os from importlib import import_module from pathlib import Path from typing import Optional @@ -7,6 +9,7 @@ from pydantic import DirectoryPath, FilePath from referencing import Registry, Resource +from ..data_transfers import automatic_dandi_upload from ...nwbconverter import NWBConverter from ...utils import dict_deep_update, load_dict_from_file @@ -50,7 +53,7 @@ def run_conversion_from_yaml( data_folder_path: Optional[DirectoryPath] = None, output_folder_path: Optional[DirectoryPath] = None, overwrite: bool = False, -): +) -> None: """ Run conversion to NWB given a yaml specification file. @@ -100,6 +103,14 @@ def run_conversion_from_yaml( registry=registry, ) + upload_to_dandiset = "upload_to_dandiset" in specification + if upload_to_dandiset and "DANDI_API_KEY" not in os.environ: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the environment variable 'DANDI_API_KEY' was not set." + ) + raise ValueError(message) + global_metadata = specification.get("metadata", dict()) global_conversion_options = specification.get("conversion_options", dict()) data_interfaces_spec = specification.get("data_interfaces") @@ -115,6 +126,7 @@ def run_conversion_from_yaml( experiment_metadata = experiment.get("metadata", dict()) for session in experiment["sessions"]: file_counter += 1 + source_data = session["source_data"] for interface_name, interface_source_data in session["source_data"].items(): for key, value in interface_source_data.items(): @@ -122,21 +134,47 @@ def run_conversion_from_yaml( source_data[interface_name].update({key: [str(Path(data_folder_path) / x) for x in value]}) elif key in ("file_path", "folder_path"): source_data[interface_name].update({key: str(Path(data_folder_path) / value)}) + converter = CustomNWBConverter(source_data=source_data) + metadata = converter.get_metadata() for metadata_source in [global_metadata, experiment_metadata, session.get("metadata", dict())]: metadata = dict_deep_update(metadata, metadata_source) - nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") + + session_id = session.get("metadata", dict()).get("NWBFile", dict()).get("session_id", None) + if upload_to_dandiset and session_id is None: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the 'session_id' was not found for session with info block: " + f"\n\n {json.dumps(obj=session, indent=2)}\n\n" + "File intended for DANDI upload must include a session ID." + ) + raise ValueError(message) + session_conversion_options = session.get("conversion_options", dict()) conversion_options = dict() for key in converter.data_interface_objects: conversion_options[key] = dict(session_conversion_options.get(key, dict()), **global_conversion_options) + + nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") converter.run_conversion( nwbfile_path=output_folder_path / f"{nwbfile_name}.nwb", metadata=metadata, overwrite=overwrite, conversion_options=conversion_options, ) + + if upload_to_dandiset: + dandiset_id = specification["upload_to_dandiset"] + staging = int(dandiset_id) >= 200_000 + automatic_dandi_upload( + dandiset_id=dandiset_id, + nwb_folder_path=output_folder_path, + staging=staging, + ) + + return None # We can early return since organization below will occur within the upload step + # To properly mimic a true dandi organization, the full directory must be populated with NWBFiles. all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"] nwbfile_paths_to_set = [ diff --git a/tests/imports.py b/tests/imports.py index 5f8b65e72..7ac95713b 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -68,6 +68,7 @@ def test_tools(self): "get_package_version", "is_package_installed", "deploy_process", + "data_transfers", "LocalPathExpander", "get_module", ] diff --git a/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml new file mode 100644 index 000000000..adf590d3a --- /dev/null +++ b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml @@ -0,0 +1,66 @@ +metadata: + NWBFile: + lab: My Lab + institution: My Institution + +conversion_options: + stub_test: True + +data_interfaces: + ap: SpikeGLXRecordingInterface + lf: SpikeGLXRecordingInterface + phy: PhySortingInterface + +upload_to_dandiset: "200560" + +experiments: + ymaze: + metadata: + NWBFile: + session_description: Subject navigating a Y-shaped maze. + + sessions: + - nwbfile_name: example_converter_spec_1 + source_data: + ap: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + metadata: + NWBFile: + session_start_time: "2020-10-09T21:19:09+00:00" + session_id: "test-yaml-1" + Subject: + subject_id: "yaml-1" + sex: F + age: P35D + species: Mus musculus + - nwbfile_name: example_converter_spec_2.nwb + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + session_id: "test-yaml-2" + Subject: + subject_id: "yaml-002" + sex: F + age: P35D + species: Mus musculus + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + + open_explore: + sessions: + - nwbfile_name: example_converter_spec_3 + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + phy: + folder_path: phy/phy_example_0/ + metadata: + NWBFile: + session_start_time: "2020-10-11T21:19:09+00:00" + session_id: test YAML 3 + Subject: + subject_id: YAML Subject Name + sex: F + age: P35D + species: Mus musculus diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index e46e25352..5a623d141 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -19,6 +19,7 @@ "fname", [ "GIN_conversion_specification.yml", + "GIN_conversion_specification_dandi_upload.yml", "GIN_conversion_specification_missing_nwbfile_names.yml", "GIN_conversion_specification_no_nwbfile_name_or_other_metadata.yml", "GIN_conversion_specification_videos.yml", diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py new file mode 100644 index 000000000..c36d072e7 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -0,0 +1,53 @@ +import os +import platform +import time +from datetime import datetime, timedelta +from pathlib import Path + +import dandi.dandiapi +import pytest +from packaging.version import Version + +from neuroconv import run_conversion_from_yaml + +from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH + +DANDI_API_KEY = os.getenv("DANDI_API_KEY") +HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks +_PYTHON_VERSION = platform.python_version() + + +@pytest.mark.skipif( + not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12"), + reason="You must set your DANDI_API_KEY to run this test!", +) +def test_run_conversion_from_yaml_with_dandi_upload(): + path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" + run_conversion_from_yaml( + specification_file_path=yaml_file_path, + data_folder_path=ECEPHY_DATA_PATH, + output_folder_path=OUTPUT_PATH, + overwrite=True, + ) + + time.sleep(60) # Give some buffer room for server to process before making assertions against DANDI API + + client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") + dandiset = client.get_dandiset("200560") + + expected_asset_paths = [ + "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", + "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-YAML-3_ecephys.nwb", + ] + for asset_path in expected_asset_paths: + test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found + test_asset_metadata = test_asset.get_raw_metadata() + + # Past uploads may have created the same apparent file, so look at the modification time to ensure + # this test is actually testing the most recent upload + date_modified = datetime.fromisoformat( + test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy + ) + assert datetime.now() - date_modified < timedelta(minutes=10) From 737468c63f17f206488cbba57484e6c9df200ff5 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 10 Dec 2024 09:38:59 -0600 Subject: [PATCH 3/3] Support arbitrary multi probe and multi trigger structures in spikeglx (#1150) --- CHANGELOG.md | 2 + .../ecephys/spikeglx/spikeglxconverter.py | 31 ++++------ .../ecephys/spikeglx/spikeglxdatainterface.py | 56 +++++++++++++------ .../ecephys/spikeglx/spikeglxnidqinterface.py | 27 ++++++--- .../spikeglx_single_probe_metadata.json | 12 ++-- tests/test_on_data/ecephys/test_lfp.py | 4 +- .../ecephys/test_recording_interfaces.py | 4 +- .../ecephys/test_spikeglx_converter.py | 31 +++++----- 8 files changed, 95 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae105b907..65d3515f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,13 @@ ## Bug Fixes * datetime objects now can be validated as conversion options [#1139](https://github.com/catalystneuro/neuroconv/pull/1126) * Fix a bug where data in `DeepLabCutInterface` failed to write when `ndx-pose` was not imported. [#1144](https://github.com/catalystneuro/neuroconv/pull/1144) +* `SpikeGLXConverterPipe` converter now accepts multi-probe structures with multi-trigger and does not assume a specific folder structure [#1150](https://github.com/catalystneuro/neuroconv/pull/1150) ## Features * Propagate the `unit_electrode_indices` argument from the spikeinterface tools to `BaseSortingExtractorInterface`. This allows users to map units to the electrode table when adding sorting data [PR #1124](https://github.com/catalystneuro/neuroconv/pull/1124) * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) +* `SpikeGLXRecordingInterface` now also accepts `folder_path` making its behavior equivalent to SpikeInterface [#1150](https://github.com/catalystneuro/neuroconv/pull/1150) * Added the `rclone_transfer_batch_job` helper function for executing Rclone data transfers in AWS Batch jobs. [PR #1085](https://github.com/catalystneuro/neuroconv/pull/1085) * Added the `deploy_neuroconv_batch_job` helper function for deploying NeuroConv AWS Batch jobs. [PR #1086](https://github.com/catalystneuro/neuroconv/pull/1086) * YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089) diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py index 007c3177c..029955d24 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxconverter.py @@ -29,8 +29,10 @@ def get_source_schema(cls): @classmethod def get_streams(cls, folder_path: DirectoryPath) -> list[str]: + "Return the stream ids available in the folder." from spikeinterface.extractors import SpikeGLXRecordingExtractor + # The first entry is the stream ids the second is the stream names return SpikeGLXRecordingExtractor.get_streams(folder_path=folder_path)[0] @validate_call @@ -61,28 +63,17 @@ def __init__( """ folder_path = Path(folder_path) - streams = streams or self.get_streams(folder_path=folder_path) + streams_ids = streams or self.get_streams(folder_path=folder_path) data_interfaces = dict() - for stream in streams: - if "ap" in stream: - probe_name = stream[:5] - file_path = ( - folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.ap.bin" - ) - es_key = f"ElectricalSeriesAP{probe_name.capitalize()}" - interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key) - elif "lf" in stream: - probe_name = stream[:5] - file_path = ( - folder_path / f"{folder_path.stem}_{probe_name}" / f"{folder_path.stem}_t0.{probe_name}.lf.bin" - ) - es_key = f"ElectricalSeriesLF{probe_name.capitalize()}" - interface = SpikeGLXRecordingInterface(file_path=file_path, es_key=es_key) - elif "nidq" in stream: - file_path = folder_path / f"{folder_path.stem}_t0.nidq.bin" - interface = SpikeGLXNIDQInterface(file_path=file_path) - data_interfaces.update({str(stream): interface}) # Without str() casting, is a numpy string + + nidq_streams = [stream_id for stream_id in streams_ids if stream_id == "nidq"] + electrical_streams = [stream_id for stream_id in streams_ids if stream_id not in nidq_streams] + for stream_id in electrical_streams: + data_interfaces[stream_id] = SpikeGLXRecordingInterface(folder_path=folder_path, stream_id=stream_id) + + for stream_id in nidq_streams: + data_interfaces[stream_id] = SpikeGLXNIDQInterface(folder_path=folder_path) super().__init__(data_interfaces=data_interfaces, verbose=verbose) diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py index c15516431..00419e036 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxdatainterface.py @@ -4,7 +4,7 @@ from typing import Optional import numpy as np -from pydantic import FilePath, validate_call +from pydantic import DirectoryPath, FilePath, validate_call from .spikeglx_utils import ( add_recording_extractor_properties, @@ -45,7 +45,6 @@ def get_source_schema(cls) -> dict: def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: extractor_kwargs = source_data.copy() - extractor_kwargs.pop("file_path") extractor_kwargs["folder_path"] = self.folder_path extractor_kwargs["all_annotations"] = True extractor_kwargs["stream_id"] = self.stream_id @@ -54,38 +53,59 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: @validate_call def __init__( self, - file_path: FilePath, + file_path: Optional[FilePath] = None, verbose: bool = True, es_key: Optional[str] = None, + folder_path: Optional[DirectoryPath] = None, + stream_id: Optional[str] = None, ): """ Parameters ---------- + folder_path: DirectoryPath + Folder path containing the binary files of the SpikeGLX recording. + stream_id: str, optional + Stream ID of the SpikeGLX recording. + Examples are 'nidq', 'imec0.ap', 'imec0.lf', 'imec1.ap', 'imec1.lf', etc. file_path : FilePathType Path to .bin file. Point to .ap.bin for SpikeGLXRecordingInterface and .lf.bin for SpikeGLXLFPInterface. verbose : bool, default: True Whether to output verbose text. - es_key : str, default: "ElectricalSeries" + es_key : str, the key to access the metadata of the ElectricalSeries. """ - self.stream_id = fetch_stream_id_for_spikelgx_file(file_path) - if es_key is None: - if "lf" in self.stream_id: - es_key = "ElectricalSeriesLF" - elif "ap" in self.stream_id: - es_key = "ElectricalSeriesAP" - else: - raise ValueError("Cannot automatically determine es_key from path") - file_path = Path(file_path) - self.folder_path = file_path.parent + if file_path is not None and stream_id is None: + self.stream_id = fetch_stream_id_for_spikelgx_file(file_path) + self.folder_path = Path(file_path).parent + + else: + self.stream_id = stream_id + self.folder_path = Path(folder_path) super().__init__( - file_path=file_path, + folder_path=folder_path, verbose=verbose, es_key=es_key, ) - self.source_data["file_path"] = str(file_path) - self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, self.stream_id)]["meta"] + + signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id) + self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key] + self.meta = self._signals_info_dict["meta"] + + if es_key is None: + stream_kind = self._signals_info_dict["stream_kind"] # ap or lf + stream_kind_caps = stream_kind.upper() + device = self._signals_info_dict["device"].capitalize() # imec0, imec1, etc. + + electrical_series_name = f"ElectricalSeries{stream_kind_caps}" + + # Add imec{probe_index} to the electrical series name when there are multiple probes + # or undefined, `typeImEnabled` is present in the meta of all the production probes + self.probes_enabled_in_run = int(self.meta.get("typeImEnabled", 0)) + if self.probes_enabled_in_run != 1: + electrical_series_name += f"{device}" + + self.es_key = electrical_series_name # Set electrodes properties add_recording_extractor_properties(self.recording_extractor) @@ -100,7 +120,7 @@ def get_metadata(self) -> dict: device = get_device_metadata(self.meta) # Should follow pattern 'Imec0', 'Imec1', etc. - probe_name = self.stream_id[:5].capitalize() + probe_name = self._signals_info_dict["device"].capitalize() device["name"] = f"Neuropixel{probe_name}" # Add groups metadata diff --git a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py index 3cf50080a..1d7079716 100644 --- a/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py +++ b/src/neuroconv/datainterfaces/ecephys/spikeglx/spikeglxnidqinterface.py @@ -1,7 +1,8 @@ from pathlib import Path +from typing import Optional import numpy as np -from pydantic import ConfigDict, FilePath, validate_call +from pydantic import ConfigDict, DirectoryPath, FilePath, validate_call from .spikeglx_utils import get_session_start_time from ..baserecordingextractorinterface import BaseRecordingExtractorInterface @@ -29,7 +30,6 @@ def get_source_schema(cls) -> dict: def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: extractor_kwargs = source_data.copy() - extractor_kwargs.pop("file_path") extractor_kwargs["folder_path"] = self.folder_path extractor_kwargs["stream_id"] = self.stream_id return extractor_kwargs @@ -37,10 +37,11 @@ def _source_data_to_extractor_kwargs(self, source_data: dict) -> dict: @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def __init__( self, - file_path: FilePath, + file_path: Optional[FilePath] = None, verbose: bool = True, load_sync_channel: bool = False, es_key: str = "ElectricalSeriesNIDQ", + folder_path: Optional[DirectoryPath] = None, ): """ Read channel data from the NIDQ board for the SpikeGLX recording. @@ -49,6 +50,8 @@ def __init__( Parameters ---------- + folder_path : DirectoryPath + Path to the folder containing the .nidq.bin file. file_path : FilePathType Path to .nidq.bin file. verbose : bool, default: True @@ -59,10 +62,17 @@ def __init__( es_key : str, default: "ElectricalSeriesNIDQ" """ - self.file_path = Path(file_path) - self.folder_path = self.file_path.parent + if file_path is None and folder_path is None: + raise ValueError("Either 'file_path' or 'folder_path' must be provided.") + + if file_path is not None: + file_path = Path(file_path) + self.folder_path = file_path.parent + + if folder_path is not None: + self.folder_path = Path(folder_path) + super().__init__( - file_path=self.file_path, verbose=verbose, load_sync_channel=load_sync_channel, es_key=es_key, @@ -72,7 +82,10 @@ def __init__( self.recording_extractor.set_property( key="group_name", values=["NIDQChannelGroup"] * self.recording_extractor.get_num_channels() ) - self.meta = self.recording_extractor.neo_reader.signals_info_dict[(0, "nidq")]["meta"] + + signal_info_key = (0, self.stream_id) # Key format is (segment_index, stream_id) + self._signals_info_dict = self.recording_extractor.neo_reader.signals_info_dict[signal_info_key] + self.meta = self._signals_info_dict["meta"] def get_metadata(self) -> dict: metadata = super().get_metadata() diff --git a/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json b/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json index 637124cd0..20f11742b 100644 --- a/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json +++ b/tests/test_on_data/ecephys/spikeglx_single_probe_metadata.json @@ -29,9 +29,9 @@ "device": "NIDQBoard" } ], - "ElectricalSeriesAPImec0": { - "name": "ElectricalSeriesAPImec0", - "description": "Acquisition traces for the ElectricalSeriesAPImec0." + "ElectricalSeriesAP": { + "name": "ElectricalSeriesAP", + "description": "Acquisition traces for the ElectricalSeriesAP." }, "Electrodes": [ { @@ -51,9 +51,9 @@ "name": "ElectricalSeriesNIDQ", "description": "Raw acquisition traces from the NIDQ (.nidq.bin) channels." }, - "ElectricalSeriesLFImec0": { - "name": "ElectricalSeriesLFImec0", - "description": "Acquisition traces for the ElectricalSeriesLFImec0." + "ElectricalSeriesLF": { + "name": "ElectricalSeriesLF", + "description": "Acquisition traces for the ElectricalSeriesLF." } } } diff --git a/tests/test_on_data/ecephys/test_lfp.py b/tests/test_on_data/ecephys/test_lfp.py index c46f8d297..010516d86 100644 --- a/tests/test_on_data/ecephys/test_lfp.py +++ b/tests/test_on_data/ecephys/test_lfp.py @@ -57,9 +57,7 @@ class TestEcephysLFPNwbConversions(unittest.TestCase): param( data_interface=SpikeGLXRecordingInterface, interface_kwargs=dict( - file_path=( - DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.lf.bin" - ) + folder_path=DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.lf" ), expected_write_module="raw", ), diff --git a/tests/test_on_data/ecephys/test_recording_interfaces.py b/tests/test_on_data/ecephys/test_recording_interfaces.py index 7677ded22..0520b5b42 100644 --- a/tests/test_on_data/ecephys/test_recording_interfaces.py +++ b/tests/test_on_data/ecephys/test_recording_interfaces.py @@ -641,9 +641,7 @@ def test_extracted_metadata(self, setup_interface): class TestSpikeGLXRecordingInterface(RecordingExtractorInterfaceTestMixin): data_interface_cls = SpikeGLXRecordingInterface interface_kwargs = dict( - file_path=str( - ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0" / "Noise4Sam_g0_t0.imec0.ap.bin" - ) + folder_path=ECEPHY_DATA_PATH / "spikeglx" / "Noise4Sam_g0" / "Noise4Sam_g0_imec0", stream_id="imec0.ap" ) save_directory = OUTPUT_PATH diff --git a/tests/test_on_data/ecephys/test_spikeglx_converter.py b/tests/test_on_data/ecephys/test_spikeglx_converter.py index af98789c1..970a815af 100644 --- a/tests/test_on_data/ecephys/test_spikeglx_converter.py +++ b/tests/test_on_data/ecephys/test_spikeglx_converter.py @@ -33,10 +33,10 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ with NWBHDF5IO(path=nwbfile_path) as io: nwbfile = io.read() - assert nwbfile.session_start_time == expected_session_start_time + assert nwbfile.session_start_time.replace(tzinfo=None) == expected_session_start_time - assert "ElectricalSeriesAPImec0" in nwbfile.acquisition - assert "ElectricalSeriesLFImec0" in nwbfile.acquisition + assert "ElectricalSeriesAP" in nwbfile.acquisition + assert "ElectricalSeriesLF" in nwbfile.acquisition assert "ElectricalSeriesNIDQ" in nwbfile.acquisition assert len(nwbfile.acquisition) == 3 @@ -74,7 +74,7 @@ def test_single_probe_spikeglx_converter(self): nwbfile_path = self.tmpdir / "test_single_probe_spikeglx_converter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) def test_in_converter_pipe(self): @@ -84,7 +84,7 @@ def test_in_converter_pipe(self): nwbfile_path = self.tmpdir / "test_spikeglx_converter_in_converter_pipe.nwb" converter_pipe.run_conversion(nwbfile_path=nwbfile_path) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) def test_in_nwbconverter(self): @@ -101,7 +101,7 @@ class TestConverter(NWBConverter): nwbfile_path = self.tmpdir / "test_spikeglx_converter_in_nwbconverter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path) - expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10).astimezone() + expected_session_start_time = datetime(2020, 11, 3, 10, 35, 10) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) @@ -118,7 +118,9 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ with NWBHDF5IO(path=nwbfile_path) as io: nwbfile = io.read() - assert nwbfile.session_start_time == expected_session_start_time + # Do the comparison without timezone information to avoid CI timezone issues + # The timezone is set by pynbw automatically + assert nwbfile.session_start_time.replace(tzinfo=None) == expected_session_start_time # TODO: improve name of segments using 'Segment{index}' for clarity assert "ElectricalSeriesAPImec00" in nwbfile.acquisition @@ -129,7 +131,7 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ assert "ElectricalSeriesLFImec01" in nwbfile.acquisition assert "ElectricalSeriesLFImec10" in nwbfile.acquisition assert "ElectricalSeriesLFImec11" in nwbfile.acquisition - assert len(nwbfile.acquisition) == 8 + assert len(nwbfile.acquisition) == 16 assert "NeuropixelImec0" in nwbfile.devices assert "NeuropixelImec1" in nwbfile.devices @@ -141,7 +143,7 @@ def assertNWBFileStructure(self, nwbfile_path: FilePath, expected_session_start_ def test_multi_probe_spikeglx_converter(self): converter = SpikeGLXConverterPipe( - folder_path=SPIKEGLX_PATH / "multi_trigger_multi_gate" / "SpikeGLX" / "5-19-2022-CI0" / "5-19-2022-CI0_g0" + folder_path=SPIKEGLX_PATH / "multi_trigger_multi_gate" / "SpikeGLX" / "5-19-2022-CI0" ) metadata = converter.get_metadata() @@ -161,13 +163,12 @@ def test_multi_probe_spikeglx_converter(self): expected_device_metadata = expected_ecephys_metadata.pop("Device") assert device_metadata == expected_device_metadata - assert test_ecephys_metadata == expected_ecephys_metadata nwbfile_path = self.tmpdir / "test_multi_probe_spikeglx_converter.nwb" converter.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata) - expected_session_start_time = datetime(2022, 5, 19, 17, 37, 47).astimezone() + expected_session_start_time = datetime(2022, 5, 19, 17, 37, 47) self.assertNWBFileStructure(nwbfile_path=nwbfile_path, expected_session_start_time=expected_session_start_time) @@ -193,7 +194,7 @@ def test_electrode_table_writing(tmp_path): np.testing.assert_array_equal(saved_channel_names, expected_channel_names_nidq) # Test AP - electrical_series = nwbfile.acquisition["ElectricalSeriesAPImec0"] + electrical_series = nwbfile.acquisition["ElectricalSeriesAP"] ap_electrodes_table_region = electrical_series.electrodes region_indices = ap_electrodes_table_region.data recording_extractor = converter.data_interface_objects["imec0.ap"].recording_extractor @@ -203,7 +204,7 @@ def test_electrode_table_writing(tmp_path): np.testing.assert_array_equal(saved_channel_names, expected_channel_names_ap) # Test LF - electrical_series = nwbfile.acquisition["ElectricalSeriesLFImec0"] + electrical_series = nwbfile.acquisition["ElectricalSeriesLF"] lf_electrodes_table_region = electrical_series.electrodes region_indices = lf_electrodes_table_region.data recording_extractor = converter.data_interface_objects["imec0.lf"].recording_extractor @@ -222,7 +223,7 @@ def test_electrode_table_writing(tmp_path): # Test round trip with spikeinterface recording_extractor_ap = NwbRecordingExtractor( file_path=nwbfile_path, - electrical_series_name="ElectricalSeriesAPImec0", + electrical_series_name="ElectricalSeriesAP", ) channel_ids = recording_extractor_ap.get_channel_ids() @@ -230,7 +231,7 @@ def test_electrode_table_writing(tmp_path): recording_extractor_lf = NwbRecordingExtractor( file_path=nwbfile_path, - electrical_series_name="ElectricalSeriesLFImec0", + electrical_series_name="ElectricalSeriesLF", ) channel_ids = recording_extractor_lf.get_channel_ids()