diff --git a/.github/workflows/add-to-dashboard.yml b/.github/workflows/add-to-dashboard.yml new file mode 100644 index 0000000..e72d989 --- /dev/null +++ b/.github/workflows/add-to-dashboard.yml @@ -0,0 +1,19 @@ +name: Add Issue or Pull Request to Dashboard + +on: + issues: + types: + - opened + pull_request: + types: + - opened + +jobs: + add-to-project: + name: Add issue or pull request to project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v0.5.0 + with: + project-url: https://github.com/orgs/catalystneuro/projects/3 + github-token: ${{ secrets.PROJECT_TOKEN }} diff --git a/.github/workflows/auto-publish.yml b/.github/workflows/auto-publish.yml new file mode 100644 index 0000000..31fb446 --- /dev/null +++ b/.github/workflows/auto-publish.yml @@ -0,0 +1,34 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# For the template repo, we simply won't specify a PYPI token. We also won't do any actual releases. +# For your actual conversion project using this template, add the PYPI_API_TOKEN to your secrets. + +name: Upload Package to PyPI + +on: + release: + types: [published] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install wheel + - name: Build package + run: | + python setup.py sdist bdist_wheel + - name: pypi-publish + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/test-install.yml b/.github/workflows/test-install.yml new file mode 100644 index 0000000..b67a8aa --- /dev/null +++ b/.github/workflows/test-install.yml @@ -0,0 +1,31 @@ + +name: Installation +on: + workflow_dispatch: + schedule: + - cron: "0 0 1 * *" # First day of the month + +jobs: + run: + name: Installation on (${{ matrix.os }} with Python ${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.8", "3.9", "3.10"] + steps: + - uses: actions/checkout@v3 + - run: git fetch --prune --unshallow --tags + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install pip + run: | + python -m pip install --upgrade pip + pip3 install packaging + - name: Install package + run: pip install -e . + - name: Test module load + run: python -c "import kind_lab_to_nwb" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7347b6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,149 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder repo_name settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# vscode +.vscode/* +.vscode/settings.json +.vscode/tasks.json +.vscode/launch.json +.vscode/extensions.json +*.code-workspace + +# pycharm +.idea/* + +# Local History for Visual Studio Code +.history/ + +#Mac +.DS_Store + +# NWB files +**.nwb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5422b47 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,25 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.8.0 + hooks: + - id: black + exclude: ^docs/ + +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.9 + hooks: + - id: ruff + args: [ --fix ] diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..4990656 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.yml +include *.json +include *.txt diff --git a/README.md b/README.md index b015289..e22cd81 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,107 @@ # kind-lab-to-nwb -NWB Conversion project for the Kind lab. +NWB conversion scripts for Kind lab data to the +[Neurodata Without Borders](https://nwb-overview.readthedocs.io/) data format. + + +## Installation +## Basic installation + +You can install the latest release of the package with pip: + +``` +pip install kind-lab-to-nwb +``` + +We recommend that you install the package inside a [virtual environment](https://docs.python.org/3/tutorial/venv. +html). A simple way of doing this is to use a [conda environment](https://docs.conda. +io/projects/conda/en/latest/user-guide/concepts/environments.html) from the `conda` package manager ([installation +instructions](https://docs.conda.io/en/latest/miniconda.html)). Detailed instructions on how to use conda +environments can be found in their [documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). + +### Running a specific conversion +Once you have installed the package with pip, you can run any of the conversion scripts in a notebook or a python file: + +https://github.com/catalystneuro/kind-lab-to-nwb//tree/main/src/arc_ecephys_2024/convert_session.py + +Copy or download this file run the script with the following command: + +``` +python convert_session.py +``` + +## Installation from GitHub +Another option is to install the package directly from GitHub. This option has the advantage that the source code +can be modified if you need to amend some of the code we originally provided to adapt to future experimental +differences. To install the conversion from GitHub you will need to use `git` ([installation instructions] (https://github.com/git-guides/install-git)). +We also recommend the installation of `conda` ([installation instructions](https://docs.conda.io/en/latest/miniconda.html)) as it contains all the required +machinery in a single and simple install. + +From a terminal (note that conda should install one in your system) you can do the following: + +``` +git clone https://github.com/catalystneuro/kind-lab-to-nwb +cd kind-lab-to-nwb +conda env create --file make_env.yml +conda activate kind_lab_to_nwb_env +``` + +This creates a [conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) which isolates the conversion code from your system libraries. We recommend that you run all your conversion related tasks and analysis from the created environment in order to minimize issues related to package dependencies. + +Alternatively, if you want to avoid conda altogether (for example if you use another virtual environment tool) you +can install the repository with the following commands using only pip: + +``` +git clone https://github.com/catalystneuro/kind-lab-to-nwb +cd kind-lab-to-nwb +pip install --editable . +``` + +Note: +both of the methods above install the repository in [editable mode](https://pip.pypa.io/en/stable/cli/pip_install/#editable-installs). + +### Running a specific conversion +If the project has more than one conversion, you can install the requirements for a specific conversion with the following command: +``` +pip install --editable .[arc_ecephys_2024] +``` + +You can run a specific conversion with the following command: +``` +python src/kind_lab_to_nwb/arc_ecephys_2024/convert_session.py +``` + +## Repository structure +Each conversion is organized in a directory of its own in the `src` directory: + + kind-lab-to-nwb/ + ├── LICENSE + ├── make_env.yml + ├── pyproject.toml + ├── README.md + ├── requirements.txt + ├── setup.py + └── src + ├── kind_lab_to_nwb + │ └── arc_ecephys_2024 + │ ├── notes.md + │ ├── behaviorinterface.py + │ ├── convert_session.py + │ ├── metadata.yml + │ ├── nwbconverter.py + │ ├── notes.md + │ └── __init__.py + │ ├── conversion_directory_b + + └── __init__.py + +For example, for the conversion `arc_ecephys_2024` you can find a directory located in `src/kind-lab-to-nwb/arc_ecephys_2024`. +Inside each conversion directory you can find the following files: + + +* `convert_sesion.py`: this script defines the function to convert one full session of the conversion. +* `metadata.yml`: metadata in yaml format for this specific conversion. +* `behaviorinterface.py`: the behavior interface. Usually ad-hoc for each conversion. +* `nwbconverter.py`: the place where the `NWBConverter` class is defined. +* `notes.md`: notes and comments concerning this specific conversion. + +The directory might contain other files that are necessary for the conversion but those are the central ones. diff --git a/make_env.yml b/make_env.yml new file mode 100644 index 0000000..5f6d223 --- /dev/null +++ b/make_env.yml @@ -0,0 +1,9 @@ +name: kind_lab_to_nwb_env +channels: +- conda-forge +- defaults +dependencies: +- python>=3.11 +- pip +- pip: + - --editable . # This calls the setup and therefore requirements minimal diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..187239a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,85 @@ +[project] +name = "kind-lab-to-nwb" +version = "0.0.1" +description = "NWB conversion scripts, functions, and classes for Kind lab conversion" +readme = "README.md" +authors = [{ name = "CatalystNeuro", email = "ben.dichter@catalystneuro.com" }] +maintainers = [{ name = "CatalystNeuro", email = "ben.dichter@catalystneuro.com" }] +license = { file = "LICENSE" } +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +dependencies = [ + "neuroconv", + "nwbinspector", +] + +[project.urls] +Repository="https://github.com/catalystneuro/kind-lab-to-nwb" + +[project.optional-dependencies] +arc_ecephys_2024 = [ + "neuroconv==0.6.5" # Pinned dependencies to specific versions +] + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +where = ["src"] +include = ["*"] + +[tool.black] +line-length = 120 +target-version = ['py39', 'py310', 'py311'] +include = '\.pyi?$' +extend-exclude = ''' +/( + \.toml + |\.yml + |\.md + |\.txt + |\.sh + |\.git + |\.ini + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | build + | dist +)/ +''' + +[tool.isort] +profile = "black" +reverse_relative = true +known_first_party = ["kind_lab_to_nwb"] + +[tool.codespell] +skip = '.git*,*.pdf,*.css' +check-hidden = true +ignore-words-list = 'assertin' + +[tool.ruff] + +[tool.ruff.lint] +select = [ + #"F401", # Unused import + "I", # All isort rules +] +fixable = ["ALL"] + + +[tool.ruff.lint.isort] +relative-imports-order = "closest-to-furthest" +known-first-party = ["neuroconv"] diff --git a/src/kind_lab_to_nwb/__init__.py b/src/kind_lab_to_nwb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/kind_lab_to_nwb/another_conversion/__init__.py b/src/kind_lab_to_nwb/another_conversion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/__init__.py b/src/kind_lab_to_nwb/arc_ecephys_2024/__init__.py new file mode 100644 index 0000000..da34d1d --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/__init__.py @@ -0,0 +1 @@ +from .nwbconverter import ArcEcephys2024NWBConverter diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/behaviorinterface.py b/src/kind_lab_to_nwb/arc_ecephys_2024/behaviorinterface.py new file mode 100644 index 0000000..23724d2 --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/behaviorinterface.py @@ -0,0 +1,37 @@ +"""Primary class for converting experiment-specific behavior.""" + +from pynwb.file import NWBFile + +from neuroconv.basedatainterface import ( + BaseDataInterface, +) +from neuroconv.utils import ( + DeepDict, +) + + +class ArcEcephys2024BehaviorInterface(BaseDataInterface): + """Behavior interface for arc_ecephys_2024 conversion""" + + keywords = ["behavior"] + + def __init__(self): + # This should load the data lazily and prepare variables you need + pass + + def get_metadata( + self, + ) -> DeepDict: + # Automatically retrieve as much metadata as possible from the source files available + metadata = super().get_metadata() + + return metadata + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: dict, + ): + # All the custom code to add the data the nwbfile + + raise NotImplementedError() diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/convert_all_sessions.py b/src/kind_lab_to_nwb/arc_ecephys_2024/convert_all_sessions.py new file mode 100644 index 0000000..5df15ef --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/convert_all_sessions.py @@ -0,0 +1,126 @@ +"""Primary script to run to convert all sessions in a dataset using session_to_nwb.""" + +import traceback +from concurrent.futures import ( + ProcessPoolExecutor, + as_completed, +) +from pathlib import Path +from pprint import pformat +from typing import Union + +from tqdm import tqdm + +from .arc_ecephys_2024_convert_session import ( + session_to_nwb, +) + + +def dataset_to_nwb( + *, + data_dir_path: Union[str, Path], + output_dir_path: Union[str, Path], + max_workers: int = 1, + verbose: bool = True, +): + """Convert the entire dataset to NWB. + + Parameters + ---------- + data_dir_path : Union[str, Path] + The path to the directory containing the raw data. + output_dir_path : Union[str, Path] + The path to the directory where the NWB files will be saved. + max_workers : int, optional + The number of workers to use for parallel processing, by default 1 + verbose : bool, optional + Whether to print verbose output, by default True + """ + data_dir_path = Path(data_dir_path) + session_to_nwb_kwargs_per_session = get_session_to_nwb_kwargs_per_session( + data_dir_path=data_dir_path, + ) + + futures = [] + with ProcessPoolExecutor(max_workers=max_workers) as executor: + for session_to_nwb_kwargs in session_to_nwb_kwargs_per_session: + session_to_nwb_kwargs["output_dir_path"] = output_dir_path + session_to_nwb_kwargs["verbose"] = verbose + exception_file_path = data_dir_path / f"ERROR_.txt" # Add error file path here + futures.append( + executor.submit( + safe_session_to_nwb, + session_to_nwb_kwargs=session_to_nwb_kwargs, + exception_file_path=exception_file_path, + ) + ) + for _ in tqdm( + as_completed(futures), + total=len(futures), + ): + pass + + +def safe_session_to_nwb( + *, + session_to_nwb_kwargs: dict, + exception_file_path: Union[Path, str], +): + """Convert a session to NWB while handling any errors by recording error messages to the exception_file_path. + + Parameters + ---------- + session_to_nwb_kwargs : dict + The arguments for session_to_nwb. + exception_file_path : Path + The path to the file where the exception messages will be saved. + """ + exception_file_path = Path(exception_file_path) + try: + session_to_nwb(**session_to_nwb_kwargs) + except Exception as e: + with open( + exception_file_path, + mode="w", + ) as f: + f.write(f"session_to_nwb_kwargs: \n {pformat(session_to_nwb_kwargs)}\n\n") + f.write(traceback.format_exc()) + + +def get_session_to_nwb_kwargs_per_session( + *, + data_dir_path: Union[str, Path], +): + """Get the kwargs for session_to_nwb for each session in the dataset. + + Parameters + ---------- + data_dir_path : Union[str, Path] + The path to the directory containing the raw data. + + Returns + ------- + list[dict[str, Any]] + A list of dictionaries containing the kwargs for session_to_nwb for each session. + """ + ##### + # # Implement this function to return the kwargs for session_to_nwb for each session + # This can be a specific list with hard-coded sessions, a path expansion or any conversion specific logic that you might need + ##### + raise NotImplementedError + + +if __name__ == "__main__": + + # Parameters for conversion + data_dir_path = Path("/Directory/With/Raw/Formats/") + output_dir_path = Path("~/conversion_nwb/") + max_workers = 1 + verbose = False + + dataset_to_nwb( + data_dir_path=data_dir_path, + output_dir_path=output_dir_path, + max_workers=max_workers, + verbose=False, + ) diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/convert_session.py b/src/kind_lab_to_nwb/arc_ecephys_2024/convert_session.py new file mode 100644 index 0000000..8379464 --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/convert_session.py @@ -0,0 +1,89 @@ +"""Primary script to run to convert an entire session for of data using the NWBConverter.""" + +import datetime +from pathlib import Path +from typing import Union +from zoneinfo import ZoneInfo + +from kind_lab_to_nwb.arc_ecephys_2024 import ArcEcephys2024NWBConverter +from neuroconv.utils import ( + dict_deep_update, + load_dict_from_file, +) + + +def session_to_nwb( + data_dir_path: Union[str, Path], + output_dir_path: Union[str, Path], + stub_test: bool = False, +): + + data_dir_path = Path(data_dir_path) + output_dir_path = Path(output_dir_path) + if stub_test: + output_dir_path = output_dir_path / "nwb_stub" + output_dir_path.mkdir( + parents=True, + exist_ok=True, + ) + + session_id = "subject_identifier_usually" + nwbfile_path = output_dir_path / f"{session_id}.nwb" + + source_data = dict() + conversion_options = dict() + + # Add Recording + source_data.update(dict(Recording=dict())) + conversion_options.update(dict(Recording=dict(stub_test=stub_test))) + + # Add Sorting + source_data.update(dict(Sorting=dict())) + conversion_options.update(dict(Sorting=dict())) + + # Add Behavior + source_data.update(dict(Behavior=dict())) + conversion_options.update(dict(Behavior=dict())) + + converter = ArcEcephys2024NWBConverter(source_data=source_data) + + # Add datetime to conversion + metadata = converter.get_metadata() + date = datetime.datetime( + year=2020, + month=1, + day=1, + tzinfo=ZoneInfo("US/Eastern"), + ) + metadata["NWBFile"]["session_start_time"] = date + + # Update default metadata with the editable in the corresponding yaml file + editable_metadata_path = Path(__file__).parent / "arc_ecephys_2024_metadata.yaml" + editable_metadata = load_dict_from_file(editable_metadata_path) + metadata = dict_deep_update( + metadata, + editable_metadata, + ) + + metadata["Subject"]["subject_id"] = "a_subject_id" # Modify here or in the yaml file + + # Run conversion + converter.run_conversion( + metadata=metadata, + nwbfile_path=nwbfile_path, + conversion_options=conversion_options, + ) + + +if __name__ == "__main__": + + # Parameters for conversion + data_dir_path = Path("/Directory/With/Raw/Formats/") + output_dir_path = Path("~/conversion_nwb/") + stub_test = False + + session_to_nwb( + data_dir_path=data_dir_path, + output_dir_path=output_dir_path, + stub_test=stub_test, + ) diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/metadata.yaml b/src/kind_lab_to_nwb/arc_ecephys_2024/metadata.yaml new file mode 100644 index 0000000..19bafb3 --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/metadata.yaml @@ -0,0 +1,20 @@ +NWBFile: + keywords: + - Keyword1 + - Keyword2 + - Keyword3 + related_publications: + https://doi.org/### or link to APA or MLA citation of the publication + session_description: + A rich text description of the experiment. Can also just be the abstract of the publication. + institution: Institution where the lab is located + lab: Kind + experimenter: + - Last, First Middle + - Last, First Middle +Subject: + species: Rattus norvegicus + description: A rich text description of the subject + age: TBD # in ISO 8601, such as "P1W2D" + sex: TBD # One of M, F, U, or O + date_of_birth: 2014-06-22 00:00:00-04:00 # Example diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/notes.md b/src/kind_lab_to_nwb/arc_ecephys_2024/notes.md new file mode 100644 index 0000000..0bd22b8 --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/notes.md @@ -0,0 +1 @@ +# Notes concerning the arc_ecephys_2024 conversion diff --git a/src/kind_lab_to_nwb/arc_ecephys_2024/nwbconverter.py b/src/kind_lab_to_nwb/arc_ecephys_2024/nwbconverter.py new file mode 100644 index 0000000..0414073 --- /dev/null +++ b/src/kind_lab_to_nwb/arc_ecephys_2024/nwbconverter.py @@ -0,0 +1,23 @@ +"""Primary NWBConverter class for this dataset.""" + +from neuroconv import ( + NWBConverter, +) +from neuroconv.datainterfaces import ( + PhySortingInterface, + SpikeGLXRecordingInterface, +) + +from .arc_ecephys_2024 import ( + ArcEcephys2024BehaviorInterface, +) + + +class ArcEcephys2024NWBConverter(NWBConverter): + """Primary conversion class for my extracellular electrophysiology dataset.""" + + data_interface_classes = dict( + Recording=SpikeGLXRecordingInterface, + Sorting=PhySortingInterface, + Behavior=ArcEcephys2024BehaviorInterface, + )