AllenNeuralDynamics · jtyoung84 · Mar 18, 2024 · Feb 20, 2024 · Mar 17, 2024 · Mar 17, 2024
diff --git a/.github/workflows/init.yml b/.github/workflows/init.yml
diff --git a/.github/workflows/tag_and_publish.yml b/.github/workflows/tag_and_publish.yml
@@ -3,9 +3,7 @@ on:
   push:
     branches:
       - main
-# Remove line 61 to enable automated semantic version bumps.
-# Change line 67 from "if: false" to "if: true" to enable PyPI publishing. 
-# Requires that svc-aindscicomp be added as an admin to repo.
+
 jobs:
   update_badges:
     runs-on: ubuntu-latest
@@ -16,10 +14,10 @@ jobs:
         ref: ${{ env.DEFAULT_BRANCH }}
         fetch-depth: 0
         token: ${{ secrets.SERVICE_TOKEN }}
-    - name: Set up Python 3.8
+    - name: Set up Python 3.10
       uses: actions/setup-python@v3
       with:
-        python-version: 3.8
+        python-version: 3.10
     - name: Install dependencies
       run: | 
         python -m pip install -e .[dev] --no-cache-dir
@@ -62,28 +60,6 @@ jobs:
         add: '["README.md"]'
   tag:
     needs: update_badges
-    if: ${{github.event.repository.name == 'aind-library-template'}}
     uses: AllenNeuralDynamics/aind-github-actions/.github/workflows/tag.yml@main
     secrets:
       SERVICE_TOKEN: ${{ secrets.SERVICE_TOKEN }}
-  publish:
-    needs: tag
-    if: false
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Pull latest changes
-        run: git pull origin main
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Install dependencies
-        run: |
-          pip install --upgrade setuptools wheel twine build
-          python -m build
-          twine check dist/*
-      - name: Publish on PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          password: ${{ secrets.AIND_PYPI_TOKEN }}
diff --git a/.github/workflows/test_and_lint.yml b/.github/workflows/test_and_lint.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ '3.8', '3.9', '3.10' ]
+        python-version: [ '3.9', '3.10', '3.11' ]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/README.md b/README.md
@@ -5,30 +5,62 @@
 [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
 ![Interrogate](https://img.shields.io/badge/interrogate-100.0%25-brightgreen)
 ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
-![Python](https://img.shields.io/badge/python->=3.7-blue?logo=python)
+![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)
 
+## Usage
 
+There are 4 main ways to run a data transformation job:
+- from a python script
+- from the command line passing in the settings as a json string
+- from the command line pointing to a config file
+- from the command line with env vars
 
-## Usage
- - To use this template, click the green `Use this template` button and `Create new repository`.
- - After github initially creates the new repository, please wait an extra minute for the initialization scripts to finish organizing the repo.
- - To enable the automatic semantic version increments: in the repository go to `Settings` and `Collaborators and teams`. Click the green `Add people` button. Add `svc-aindscicomp` as an admin. Modify the file in `.github/workflows/tag_and_publish.yml` and remove the if statement in line 10. The semantic version will now be incremented every time a code is committed into the main branch.
- - To publish to PyPI, enable semantic versioning and uncomment the publish block in `.github/workflows/tag_and_publish.yml`. The code will now be published to PyPI every time the code is committed into the main branch.
- - The `.github/workflows/test_and_lint.yml` file will run automated tests and style checks every time a Pull Request is opened. If the checks are undesired, the `test_and_lint.yml` can be deleted. The strictness of the code coverage level, etc., can be modified by altering the configurations in the `pyproject.toml` file and the `.flake8` file.
-
-## Installation
-To use the software, in the root directory, run
+Assuming `output_dir` exists:
+
+### From python
+```python
+from aind_data_transformation.ephys.ephys_job import EphysJobSettings, EphysCompressionJob
+from pathlib import Path
+
+input_source = Path("./tests/resources/v0.6.x_neuropixels_multiexp_multistream")
+output_dir = Path("output_dir")
+
+job_settings = EphysJobSettings(input_source=input_source, output_directory=output_dir)
+job = EphysCompressionJob(job_settings=job_settings)
+
+response = job.run_job()
+```
+
+### From the command line passing in settings as a json str
 ```bash
-pip install -e .
+python -m aind_data_transformation.ephys.ephys_job --job-settings '{"input_source":"./tests/resources/v0.6.x_neuropixels_multiexp_multistream","output_directory":"output_dir"}'
 ```
 
-To develop the code, run
+### From the command line passing in settings via a config file
 ```bash
-pip install -e .[dev]
+python -m aind_data_transformation.ephys.ephys_job --config-file configs.json
+```
+
+### From the command line passing in settings via environment variables
+```bash
+export TRANSFORMATION_JOB_INPUT_SOURCE="./tests/resources/v0.6.x_neuropixels_multiexp_multistream"
+export TRANSFORMATION_JOB_OUTPUT_DIRECTORY="output_dir"
+python -m aind_data_transformation.ephys.ephys_job
 ```
 
+
 ## Contributing
 
+The development dependencies can be installed with
+```bash
+pip install -e .[dev]
+```
+
+### Adding a new transformation job
+Any new job needs a settings class that inherits the BasicJobSettings class. This requires the fields input_source and output_directory and makes it so that the env vars have the TRANSFORMATION_JOB prefix.
+
+Any new job needs to inherit the GenericEtl class. This requires that the main public method to execute is called `run_job` and returns a JobResponse.
+
 ### Linters and testing
 
 There are several libraries used to run linters, check documentation, and run tests.
@@ -88,13 +120,3 @@ The table below, from [semantic release](https://github.com/semantic-release/sem
 | `feat(pencil): add 'graphiteWidth' option`                                                                                                                                                       | ~~Minor~~ Feature Release                                                                                       |
 | `perf(pencil): remove graphiteWidth option`<br><br>`BREAKING CHANGE: The graphiteWidth option has been removed.`<br>`The default graphite width of 10mm is always used for performance reasons.` | ~~Major~~ Breaking Release <br /> (Note that the `BREAKING CHANGE: ` token must be in the footer of the commit) |
 
-### Documentation
-To generate the rst files source files for documentation, run
-```bash
-sphinx-apidoc -o doc_template/source/ src 
-```
-Then to create the documentation HTML files, run
-```bash
-sphinx-build -b html doc_template/source/ doc_template/build/html
-```
-More info on sphinx installation can be found [here](https://www.sphinx-doc.org/en/master/usage/installation.html).
diff --git a/doc_template/source/conf.py b/doc_template/source/conf.py
@@ -1,12 +1,15 @@
 """Configuration file for the Sphinx documentation builder."""
+
 #
 # For the full list of built-in configuration values, see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
+from datetime import date
+
 # -- Path Setup --------------------------------------------------------------
-from os.path import dirname, abspath
+from os.path import abspath, dirname
 from pathlib import Path
-from datetime import date
+
 from aind_data_transformation import __version__ as package_version
 
 INSTITUTE_NAME = "Allen Institute for Neural Dynamics"

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "aind-data-transformation"
 description = "Generated from aind-library-template"
 license = {text = "MIT"}
-requires-python = ">=3.7"
+requires-python = ">=3.9"
 authors = [
     {name = "Allen Institute for Neural Dynamics"}
 ]
@@ -17,10 +17,20 @@ readme = "README.md"
 dynamic = ["version"]
 
 dependencies = [
+    'pydantic>=2.0',
+    'pydantic-settings>=2.0',
 ]
 
 [project.optional-dependencies]
+ephys = [
+    'spikeinterface[full]>=0.100.0',
+    'probeinterface==0.2.21',
+    'wavpack-numcodecs>=0.1.3,<=0.1.5',
+    'zarr==2.16.1',
+    'numcodecs==0.11.0'
+]
 dev = [
+    'aind-data-transformation[ephys]',
     'black',
     'coverage',
     'flake8',
@@ -38,7 +48,7 @@ version = {attr = "aind_data_transformation.__version__"}
 
 [tool.black]
 line-length = 79
-target_version = ['py36']
+target_version = ['py39']
 exclude = '''
 
 (

diff --git a/src/aind_data_transformation/__init__.py b/src/aind_data_transformation/__init__.py
@@ -1,2 +1,3 @@
 """Init package"""
+
 __version__ = "0.0.0"
diff --git a/src/aind_data_transformation/core.py b/src/aind_data_transformation/core.py
@@ -0,0 +1,99 @@
+"""Core abstract class that can be used as a template for etl jobs."""
+
+import argparse
+import json
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Generic, Optional, TypeVar
+
+from pydantic import BaseModel, ConfigDict, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+def get_parser() -> argparse.ArgumentParser:
+    """
+    Get a standard parser that can be used to parse command line args
+    Returns
+    -------
+    argparse.ArgumentParser
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-j",
+        "--job-settings",
+        required=False,
+        type=str,
+        help=(
+            r"""
+            Instead of init args the job settings can optionally be passed in
+            as a json string in the command line.
+            """
+        ),
+    )
+    parser.add_argument(
+        "-c",
+        "--config-file",
+        required=False,
+        type=Path,
+        help=(
+            r"""
+            Instead of init args the job settings can optionally be loaded from
+            a config file.
+            """
+        ),
+    )
+    return parser
+
+
+class BasicJobSettings(BaseSettings):
+    """Model to define Transformation Job Configs"""
+
+    model_config = SettingsConfigDict(env_prefix="TRANSFORMATION_JOB_")
+    input_source: Path
+    output_directory: Path
+
+    @classmethod
+    def from_config_file(cls, config_file_location: Path):
+        """
+        Utility method to create a class from a json file
+        Parameters
+        ----------
+        config_file_location : Path
+          Location of json file to read.
+
+        """
+        with open(config_file_location, "r") as f:
+            file_contents = json.load(f)
+        return cls.model_validate_json(json.dumps(file_contents))
+
+
+_T = TypeVar("_T", bound=BasicJobSettings)
+
+
+class JobResponse(BaseModel):
+    """Standard model of a JobResponse."""
+
+    model_config = ConfigDict(extra="forbid")
+    status_code: int
+    message: Optional[str] = Field(None)
+    data: Optional[str] = Field(None)
+
+
+class GenericEtl(ABC, Generic[_T]):
+    """A generic etl class. Child classes will need to create a JobSettings
+    object that is json serializable. Child class will also need to implement
+    the run_job method, which returns a JobResponse object."""
+
+    def __init__(self, job_settings: _T):
+        """
+        Class constructor for the GenericEtl class.
+        Parameters
+        ----------
+        job_settings : _T
+          Generic type that is bound by the BaseSettings class.
+        """
+        self.job_settings = job_settings
+
+    @abstractmethod
+    def run_job(self) -> JobResponse:
+        """Abstract method that needs to be implemented by child classes."""
diff --git a/src/aind_data_transformation/ephys/__init__.py b/src/aind_data_transformation/ephys/__init__.py
@@ -0,0 +1 @@
+"""Package to handle ephys compression job"""