[Cloud Deployment I]: Main dockerfile (#383)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ben Dichter <[email protected]>
catalystneuro · Apr 8, 2024 · 70d6ce0 · 70d6ce0
1 parent 468157b
commit 70d6ce0
Show file tree

Hide file tree

Showing 14 changed files with 581 additions and 6 deletions.
diff --git a/.github/workflows/build_and_upload_docker_image_dev.yml b/.github/workflows/build_and_upload_docker_image_dev.yml
@@ -0,0 +1,41 @@
+name: Build and Upload Docker Image of Current Dev Branch to GHCR
+
+on:
+  schedule:
+    - cron: "0 16 * * 1"  # Weekly at noon EST on Monday
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  release-image:
+    name: Build and Upload Docker Image of Current Dev Branch to GHCR
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
+          password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+      - name: Get current date
+        id: date
+        run: |
+          date_tag="$(date +'%Y-%m-%d')"
+          echo "date_tag=$date_tag" >> $GITHUB_OUTPUT
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true  # Push is a shorthand for --output=type=registry
+          tags: ghcr.io/catalystneuro/neuroconv:dev,ghcr.io/catalystneuro/neuroconv:${{ steps.date.outputs.date_tag }}
+          context: .
+          file: dockerfiles/neuroconv_dev_dockerfile
+          provenance: false
diff --git a/.github/workflows/build_and_upload_docker_image_latest_release.yml b/.github/workflows/build_and_upload_docker_image_latest_release.yml
@@ -0,0 +1,46 @@
+name: Build and Upload Docker Image of Latest Release to GHCR
+
+on:
+  workflow_run:
+    workflows: [auto-publish]
+    types: [completed]
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  release-image:
+    name: Build and Upload Docker Image of Latest Release to GHCR
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Parse the version from the GitHub latest release tag
+        id: parsed_version
+        run: |
+          git fetch --prune --unshallow --tags
+          tags="$(git tag --list)"
+          version_tag=${tags: -6 : 6}
+          echo "version_tag=$version_tag" >> $GITHUB_OUTPUT
+      - name: Printout parsed version for GitHub Action log
+        run: echo ${{ steps.parsed_version.outputs.version_tag }}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
+          password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true  # Push is a shorthand for --output=type=registry
+          tags: ghcr.io/catalystneuro/neuroconv:latest,ghcr.io/catalystneuro/neuroconv:${{ steps.parsed_version.outputs.version_tag }}
+          context: .
+          file: dockerfiles/neuroconv_latest_release_dockerfile
+          provenance: false
diff --git a/.github/workflows/build_and_upload_docker_image_yaml_variable.yml b/.github/workflows/build_and_upload_docker_image_yaml_variable.yml
@@ -0,0 +1,37 @@
+name: Build and Upload Docker Image of latest with YAML variable to GHCR
+
+on:
+  workflow_run:
+    workflows: [build_and_upload_docker_image_latest_release]
+    types: [completed]
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  release-image:
+    name: Build and Upload Docker Image of latest with YAML variable to GHCR
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
+          password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+      - name: Build and push YAML variable image based on latest
+        uses: docker/build-push-action@v5
+        with:
+          push: true  # Push is a shorthand for --output=type=registry
+          tags: ghcr.io/catalystneuro/neuroconv:yaml_variable
+          context: .
+          file: dockerfiles/neuroconv_latest_yaml_variable
+          provenance: false
diff --git a/.github/workflows/docker_testing.yml b/.github/workflows/docker_testing.yml
@@ -0,0 +1,82 @@
+name: Docker CLI tests
+on:
+  schedule:
+    - cron: "0 16 * * *"  # Daily at noon EST
+  workflow_dispatch:
+
+jobs:
+  run:
+    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest] #, macos-latest, windows-latest]  # Seems docker might only be available for ubuntu on GitHub Actions
+    steps:
+      - uses: actions/checkout@v4
+      - run: git fetch --prune --unshallow --tags
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Global Setup
+        run: python -m pip install -U pip  # Official recommended way
+
+      - name: Install pytest and neuroconv minimal
+        run: |
+          pip install pytest
+          pip install .
+
+      - name: Get ephy_testing_data current head hash
+        id: ephys
+        run: echo "::set-output name=HASH_EPHY_DATASET::$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)"
+      - name: Cache ephys dataset - ${{ steps.ephys.outputs.HASH_EPHY_DATASET }}
+        uses: actions/cache@v4
+        id: cache-ephys-datasets
+        with:
+          path: ./ephy_testing_data
+          key: ephys-datasets-2023-06-26-${{ matrix.os }}-${{ steps.ephys.outputs.HASH_EPHY_DATASET }}
+      - name: Get ophys_testing_data current head hash
+        id: ophys
+        run: echo "::set-output name=HASH_OPHYS_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1)"
+      - name: Cache ophys dataset - ${{ steps.ophys.outputs.HASH_OPHYS_DATASET }}
+        uses: actions/cache@v4
+        id: cache-ophys-datasets
+        with:
+          path: ./ophys_testing_data
+          key: ophys-datasets-2022-08-18-${{ matrix.os }}-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }}
+      - name: Get behavior_testing_data current head hash
+        id: behavior
+        run: echo "::set-output name=HASH_BEHAVIOR_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1)"
+      - name: Cache behavior dataset - ${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }}
+        uses: actions/cache@v4
+        id: cache-behavior-datasets
+        with:
+          path: ./behavior_testing_data
+          key: behavior-datasets-2023-07-26-${{ matrix.os }}-${{ steps.behavior.outputs.HASH_behavior_DATASET }}
+
+      - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' || steps.cache-ophys-datasets.outputs.cache-hit != 'true' || steps.cache-behavior-datasets.outputs.cache-hit != 'true'
+        name: Install and configure AWS CLI
+        run: |
+          pip install awscli
+          aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true'
+        name: Download ephys dataset from S3
+        run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ephy_testing_data ./ephy_testing_data
+      - if: steps.cache-ophys-datasets.outputs.cache-hit != 'true'
+        name: Download ophys dataset from S3
+        run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ophys_testing_data ./ophys_testing_data
+      - if: steps.cache-behavior-datasets.outputs.cache-hit != 'true'
+        name: Download behavior dataset from S3
+        run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/behavior_testing_data ./behavior_testing_data
+
+      - name: Pull docker image
+        run: |
+          docker pull ghcr.io/catalystneuro/neuroconv:latest
+          docker pull ghcr.io/catalystneuro/neuroconv:yaml_variable
+
+      - name: Run docker tests
+        run: pytest tests/docker_yaml_conversion_specification_cli.py -vv -rsx
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,17 @@
 # Upcoming
 
-### Bug fixes
-* Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
-
 ### Deprecations
 * Removed `stream_id` as an argument from `IntanRecordingInterface` [PR #794](https://github.com/catalystneuro/neuroconv/pull/794)
 * Replaced `waveform_extractor.is_extension` with `waveform_extractor.has_extension`[PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
 
+### Features
+* Released the first official Docker images for the package on the GitHub Container Repository (GHCR). [PR #383](https://github.com/catalystneuro/neuroconv/pull/383)
+
+### Bug fixes
+* Fixed writing waveforms directly to file [PR #799](https://github.com/catalystneuro/neuroconv/pull/799)
+
+
+
 # v0.4.8 (March 20, 2024)
 
 ### Bug fixes

diff --git a/dockerfiles/neuroconv_dev_dockerfile b/dockerfiles/neuroconv_dev_dockerfile
@@ -0,0 +1,6 @@
+FROM python:3.11.7-slim
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A docker image for the most recent daily build of the main branch."
+ADD ./ neuroconv
+RUN cd neuroconv && pip install .[full]
+CMD ["python -m"]
diff --git a/dockerfiles/neuroconv_latest_release_dockerfile b/dockerfiles/neuroconv_latest_release_dockerfile
@@ -0,0 +1,6 @@
+FROM python:3.11.7-slim
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package."
+RUN apt update && apt install musl-dev python3-dev -y
+RUN pip install neuroconv[full]
+CMD ["python -m"]
diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable
@@ -0,0 +1,4 @@
+FROM ghcr.io/catalystneuro/neuroconv:latest
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package. Modified to take in environment variables for the YAML conversion specification and other command line arguments."
+CMD echo "$NEUROCONV_YAML" > run.yml && python -m neuroconv run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite
diff --git a/docs/developer_guide.rst b/docs/developer_guide.rst
@@ -15,5 +15,6 @@ The most common contribution is for a user/developer to help us integrate a new
     Testing Suite <developer_guide/testing_suite>
     Coding Style <developer_guide/style_guide>
     Building the Documentation <developer_guide/building_documentation>
+    Building the Docker Image <developer_guide/docker_images.rst>
 
 Or feel free to raise a `bug report <https://github.com/catalystneuro/neuroconv/issues/new?assignees=&labels=bug&template=bug_report.yml&title=%5BBug%5D%3A+>`_ or `feature request <https://github.com/catalystneuro/neuroconv/issues/new?assignees=&labels=enhancement&template=feature_request.yml&title=%5BFeature%5D%3A+>`_ for our maintainers to prioritize!
diff --git a/docs/developer_guide/docker_images.rst b/docs/developer_guide/docker_images.rst
@@ -0,0 +1,96 @@
+Manually Build Docker Images
+----------------------------
+
+.. note::
+
+    It is recommended to build the docker image on the same system architecture that you intend to run it on, *i.e.*, AWS Linux AMI 64-bit (x86), as it may experience difficulties running on other significantly different systems (like an M1 Mac).
+
+.. note::
+
+    The NeuroConv docker container comes prepackaged with all required installations, equivalent to running ``pip install neuroconv[full]``. As such it is relatively heavy, so be sure that whatever environment you intend to use it in (such as in continuous integration) has sufficient disk space.
+
+
+Latest Release
+~~~~~~~~~~~~~~
+
+To manually build the most recent release, navigate to the ``neuroconv/dockerfiles`` folder and run...
+
+.. code::
+
+    docker build -f neuroconv_latest_release_dockerfile -t neuroconv_latest_release .
+
+
+Dev Branch
+~~~~~~~~~~
+
+Checkout to a specific branch on a local clone, then...
+
+.. code::
+
+    docker build -f neuroconv_dev_dockerfile -t neuroconv_dev .
+
+
+
+Publish Container to GitHub
+---------------------------
+
+The ``LABEL`` is important to include as it determines the host repository on the GitHub Container Registry (GHCR). In each dockerfile we wish to publish on the GHCR, we will add this label right after the ``FROM`` clause...
+
+.. code::
+
+    FROM PARENT_IMAGE:TAG
+    LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+
+After building the image itself, we can publish the container with...
+
+.. code::
+
+    docker tag IMAGE_NAME ghcr.io/catalystneuro/IMAGE_NAME:TAG
+    export CR_PAT="<YOUR GITHUB SECRET TOKEN>"
+    echo $CR_PAT | docker login ghcr.io -u <YOUR GITHUB USERNAME> --password-stdin
+    docker push ghcr.io/catalystneuro/IMAGE_NAME:TAG
+
+.. note::
+
+    Though it may appear confusing, the use of the ``IMAGE_NAME`` in these steps determines only the _name_ of the package as available from the 'packages' screen of the host repository; the ``LABEL`` itself ensured the upload and linkage to the NeuroConv GHCR.
+
+
+
+Run Docker container on local YAML conversion specification file
+----------------------------------------------------------------
+
+You can either perform a manual build locally following the instructions above, or pull the container from the GitHub Container Registry (GHCR) with...
+
+.. code::
+
+    docker pull ghcr.io/catalystneuro/neuroconv:latest
+
+and can then run the entrypoint (equivalent to the usual command line usage) on a YAML specification file (named ``your_specification_file.yml``) with...
+
+.. code::
+
+    docker run -it --volume /your/local/volume/:/desired/alias/of/volume/ ghcr.io/catalystneuro/neuroconv:latest neuroconv /desired/alias/of/drive/your_specification_file.yml
+
+
+
+Run Docker container on YAML conversion specification environment variable
+--------------------------------------------------------------------------
+
+An alternative approach that simplifies usage on systems such as AWS Batch is to specify the YAML contents as an environment variable. The YAML file is constructed in the first step of the container launch.
+
+The only potential downside with this usage is the maximum size of an environment variable (~13,000 characters). Typical YAML specification files should not come remotely close to this limit.
+
+Otherwise, in any cloud deployment, the YAML file transfer will have to be managed separately, likely as a part of the data transfer or an entirely separate step.
+
+To use this alternative image on a local environment, you no longer need to invoke the ``neuroconv`` entrypoint pointing to a file. Instead, just set the environment variables and run the docker container on the mounted volume...
+
+.. code::
+
+    export YAML_STREAM="<copy and paste contents of YAML file (manually replace instances of double quotes with single quotes)>"
+    export NEUROCONV_DATA_PATH="/desired/alias/of/volume/"
+    export NEUROCONV_OUTPUT_PATH="/desired/alias/of/volume/"
+    docker run -it --volume /your/local/volume/:/desired/alias/of/volume/ ghcr.io/catalystneuro/neuroconv:yaml_variable
+
+.. note::
+
+    On Windows, use ``set`` instead of ``export``.
diff --git a/docs/developer_guide/testing_suite.rst b/docs/developer_guide/testing_suite.rst
@@ -61,6 +61,7 @@ These can be run in isolation using ``pip install -e neuroconv[test,<modality>]`
 ``pytest tests/test_<modality>`` where ``<modality>`` can be any of ``ophys``, ``ecephys``, ``text``, or ``behavior``.
 
 
+.. _example_data:
 
 Testing On Example Data
 -----------------------
-Original file line number
+Diff line change
@@ Expand Up @@
     ``pytest tests/test_<modality>`` where ``<modality>`` can be any of ``ophys``, ``ecephys``, ``text``, or ``behavior``.
+    .. _example_data:
     Testing On Example Data
     -----------------------
@@ Expand Down @@