catalystneuro · CodyCBakerPhD · Mar 27, 2023 · Mar 27, 2023 · Mar 29, 2023 · Apr 2, 2023
diff --git a/.github/workflows/aws_tests.yml b/.github/workflows/aws_tests.yml
@@ -0,0 +1,42 @@
+name: AWS Tests
+on:
+  schedule:
+    - cron: "0 16 * * 1"  # Weekly at noon on Monday
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
+
+jobs:
+  run:
+    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v4
+      - run: git fetch --prune --unshallow --tags
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Global Setup
+        run: |
+          python -m pip install -U pip  # Official recommended way
+          git config --global user.email "[email protected]"
+          git config --global user.name "CI Almighty"
+
+      - name: Install full requirements
+        run: pip install .[aws,test]
+
+      - name: Run subset of tests that use AWS live services
+        run: pytest -rsx -n auto tests/test_minimal/test_tools/aws_tools.py
diff --git a/...and_upload_docker_image_yaml_variable.yml → ...d_docker_image_dev_for_ec2_deployment.yml b/...and_upload_docker_image_yaml_variable.yml → ...d_docker_image_dev_for_ec2_deployment.yml
@@ -1,9 +1,8 @@
-name: Build and Upload Docker Image of latest with YAML variable to GHCR
+name: Build and Upload Docker Image of Current Dev Branch to GHCR
 
 on:
-  workflow_run:
-    workflows: [build_and_upload_docker_image_latest_release]
-    types: [completed]
+  schedule:
+    - cron: "0 16 * * 1"  # Weekly at noon EST on Monday
   workflow_dispatch:
 
 concurrency:  # Cancel previous workflows on the same pull request
@@ -12,7 +11,7 @@ concurrency:  # Cancel previous workflows on the same pull request
 
 jobs:
   release-image:
-    name: Build and Upload Docker Image of latest with YAML variable to GHCR
+    name: Build and Upload Docker Image of Current Dev Branch to GHCR
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -27,11 +26,16 @@ jobs:
           registry: ghcr.io
           username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
           password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
-      - name: Build and push YAML variable image based on latest
+      - name: Get current date
+        id: date
+        run: |
+          date_tag="$(date +'%Y-%m-%d')"
+          echo "date_tag=$date_tag" >> $GITHUB_OUTPUT
+      - name: Build and push
         uses: docker/build-push-action@v5
         with:
           push: true  # Push is a shorthand for --output=type=registry
-          tags: ghcr.io/catalystneuro/neuroconv:yaml_variable
+          tags: ghcr.io/catalystneuro/neuroconv:dev,ghcr.io/catalystneuro/neuroconv:${{ steps.date.outputs.date_tag }}
           context: .
-          file: dockerfiles/neuroconv_latest_yaml_variable
+          file: dockerfiles/neuroconv_dev_for_ec2_deployment
           provenance: false
diff --git a/.github/workflows/build_and_upload_docker_image_latest_release.yml b/.github/workflows/build_and_upload_docker_image_latest_release.yml
@@ -17,6 +17,7 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+
       - name: Parse the version from the GitHub latest release tag
         id: parsed_version
         run: |
@@ -26,6 +27,7 @@ jobs:
           echo "version_tag=$version_tag" >> $GITHUB_OUTPUT
       - name: Printout parsed version for GitHub Action log
         run: echo ${{ steps.parsed_version.outputs.version_tag }}
+
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx
@@ -36,11 +38,12 @@ jobs:
           registry: ghcr.io
           username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
           password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+
       - name: Build and push
         uses: docker/build-push-action@v5
         with:
           push: true  # Push is a shorthand for --output=type=registry
           tags: ghcr.io/catalystneuro/neuroconv:latest,ghcr.io/catalystneuro/neuroconv:${{ steps.parsed_version.outputs.version_tag }}
           context: .
-          file: dockerfiles/neuroconv_latest_release_dockerfile
+          file: dockerfiles/neuroconv_release_dockerfile
           provenance: false
diff --git a/.github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml b/.github/workflows/build_and_upload_docker_image_latest_release_for_ec2_deployment.yml
@@ -0,0 +1,48 @@
+name: Build and Upload Docker Image of Latest Release for EC2 Deployment to GHCR
+
+on:
+  schedule:
+    - cron: "0 16 * * 1"  # Weekly at noon EST on Monday
+  workflow_dispatch:
+
+concurrency:  # Cancel previous workflows on the same pull request
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  release-image:
+    name: Build and Upload Docker Image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Parse the version from the GitHub latest release tag
+        id: parsed_version
+        run: |
+          git fetch --prune --unshallow --tags
+          tags="$(git tag --list)"
+          version_tag=${tags: -6 : 6}
+          echo "version_tag=$version_tag" >> $GITHUB_OUTPUT
+      - name: Printout parsed version for GitHub Action log
+        run: echo ${{ steps.parsed_version.outputs.version_tag }}
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
+          password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true  # Push is a shorthand for --output=type=registry
+          tags: ghcr.io/catalystneuro/neuroconv_for_ec2_deployment:dev
+          context: .
+          file: dockerfiles/neuroconv_release_for_ec2_deployment
+          provenance: false
diff --git a/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml b/.github/workflows/build_and_upload_docker_image_rclone_with_config.yml
@@ -26,6 +26,7 @@ jobs:
           registry: ghcr.io
           username: ${{ secrets.DOCKER_UPLOADER_USERNAME }}
           password: ${{ secrets.DOCKER_UPLOADER_PASSWORD }}
+
       - name: Build and push
         uses: docker/build-push-action@v5
         with:

diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml
@@ -36,8 +36,6 @@ jobs:
       - name: Install full requirements
         run: pip install .[test,full]
 
-      - name: Run subset of tests that use S3 live services
-        run: pytest -rsx -n auto tests/test_minimal/test_tools/s3_tools.py
       - name: Run subset of tests that use DANDI live services
         run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py
       - name: Run subset of tests that use Globus live services

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Upcoming
 
+### Features
+* Added MedPCInterface for operant behavioral output files. [PR #883](https://github.com/catalystneuro/neuroconv/pull/883)
+* Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for basic automated submission of AWS batch jobs. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384)
+
+
 
 ## v0.5.0 (July 17, 2024)
 
@@ -12,7 +17,6 @@
 
 ### Features
 * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902)
-* Added MedPCInterface for operant behavioral output files. [PR #883](https://github.com/catalystneuro/neuroconv/pull/883)
 
 ### Bug fixes
 * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922)

diff --git a/dockerfiles/neuroconv_dev_for_ec2_deployment b/dockerfiles/neuroconv_dev_for_ec2_deployment
@@ -0,0 +1,6 @@
+FROM python:3.11.7-slim
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A docker image extending the dev branch of the NeuroConv package with modifications related to deployment on EC2 Batch."
+ADD ./ neuroconv
+RUN cd neuroconv && pip install .[full]
+CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv_ec2 run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" --update-tracking-table "$TRACKING_TABLE" --tracking-table-submission-id "$SUBMISSION_ID" --efs-volume-name-to-cleanup "$EFS_VOLUME"
diff --git a/dockerfiles/neuroconv_latest_yaml_variable b/dockerfiles/neuroconv_latest_yaml_variable
diff --git a/...files/neuroconv_latest_release_dockerfile → dockerfiles/neuroconv_release_dockerfile b/...files/neuroconv_latest_release_dockerfile → dockerfiles/neuroconv_release_dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11.7-slim
 LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
-LABEL org.opencontainers.image.description="A docker image for the most recent official release of the NeuroConv package."
+LABEL org.opencontainers.image.description="A docker image for an official release of the full NeuroConv package."
 RUN apt update && apt install musl-dev python3-dev -y
 RUN pip install "neuroconv[full]"
 CMD ["python -m"]
diff --git a/dockerfiles/neuroconv_release_for_ec2_deployment b/dockerfiles/neuroconv_release_for_ec2_deployment
@@ -0,0 +1,4 @@
+FROM ghcr.io/catalystneuro/neuroconv:latest
+LABEL org.opencontainers.image.source=https://github.com/catalystneuro/neuroconv
+LABEL org.opencontainers.image.description="A docker image extending the official release of the NeuroConv package with modifications related to deployment on EC2 Batch."
+CMD printf "$NEUROCONV_YAML" > run.yml && python -m neuroconv_ec2 run.yml --data-folder-path "$NEUROCONV_DATA_PATH" --output-folder-path "$NEUROCONV_OUTPUT_PATH" --overwrite --upload-to-dandiset-id "$DANDISET_ID" --update-tracking-table "$TRACKING_TABLE" --tracking-table-submission-id "$SUBMISSION_ID" --efs-volume-name-to-cleanup "$EFS_VOLUME"
diff --git a/docs/developer_guide/aws_batch_deployment.rst b/docs/developer_guide/aws_batch_deployment.rst
@@ -0,0 +1,168 @@
+One way of deploying items on AWS Batch is to manually setup the entire workflow through AWS web UI, and to manually submit each jobs in that manner.
+
+Deploying hundreds of jobs in this way would be cumbersome.
+
+Here are two other methods that allow simpler deployment by using `boto3`
+
+
+Semi-automated Deployment of NeuroConv on AWS Batch
+---------------------------------------------------
+
+Step 1: Transfer data to Elastic File System (EFS)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The nice thing about using EFS is that we are only ever billed for our literal amount of disk storage over time, and do not need to specify a particular fixed allocation or scaling strategy.
+
+It is also relatively easy to mount across multiple AWS Batch jobs simultaneously.
+
+Unfortunately, the one downside is that it's pricing per GB-month is significantly higher than either S3 or EBS.
+
+To easily transfer data from a Google Drive (or theoretically any backend supported by `rclone`), set the following environment variables for rclone credentials: `DRIVE_NAME`, `TOKEN`, `REFRESH_TOKEN`, and `EXPIRY`.
+
+.. note:
+
+    I eventually hope to just be able to read and pass these directly from a local `rclone.conf` file, but
+
+.. note:
+
+    All path references must point to `/mnt/data/` as the base in order to persist across jobs.
+
+.. code: python
+
+    import os
+    from datetime import datetime
+
+    from neuroconv.tools.data_transfers import submit_aws_batch_job
+
+    job_name = "<unique job name>"
+    docker_container = "ghcr.io/catalystneuro/rclone_auto_config:latest"
+    efs_name = "<your EFS volume name>"
+
+    log_datetime = str(datetime.now()).replace(" ", ":")  # no spaces in CLI
+    RCLONE_COMMAND = f"{os.environ['RCLONE_COMMAND']} -v --config /mnt/data/rclone.conf --log-file /mnt/data/submit-{log_datetime}.txt"
+
+    environment_variables = [
+        dict(name="DRIVE_NAME", value=os.environ["DRIVE_NAME"]),
+        dict(name="TOKEN", value=os.environ["TOKEN"]),
+        dict(name="REFRESH_TOKEN", value=os.environ["REFRESH_TOKEN"]),
+        dict(name="EXPIRY", value=os.environ["EXPIRY"]),
+        dict(name="RCLONE_COMMAND", value=RCLONE_COMMAND),
+    ]
+
+    submit_aws_batch_job(
+        job_name=job_name,
+        docker_container=docker_container,
+        efs_name=efs_name,
+        environment_variables=environment_variables,
+    )
+
+
+An example `RCLONE_COMMAND` for a drive named 'MyDrive' and the GIN testing data stored under `/ephy_testing_data/spikeglx/Noise4Sam_g0/` of that drive would be
+
+.. code:
+
+    RCLONE_COMMAND = "sync MyDrive:/ephy_testing_data/spikeglx/Noise4Sam_g0 /mnt/data/Noise4Sam_g0"
+
+
+Step 2: Run the YAML Conversion Specification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Continuing the example above, if we have the YAML file `test_batch.yml`
+
+.. code:
+
+    metadata:
+      NWBFile:
+        lab: My Lab
+        institution: My Institution
+
+    conversion_options:
+      stub_test: True
+
+    data_interfaces:
+      ap: SpikeGLXRecordingInterface
+      lf: SpikeGLXRecordingInterface
+
+    experiments:
+      ymaze:
+        metadata:
+          NWBFile:
+            session_description: Testing batch deployment.
+
+        sessions:
+          - nwbfile_name: /mnt/data/test_batch_deployment.nwb
+            source_data:
+              ap:
+                file_path: /mnt/data/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin
+              lf:
+                file_path: /mnt/data/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin
+            metadata:
+              NWBFile:
+                session_id: test_batch_deployment
+              Subject:
+                subject_id: "1"
+                sex: F
+                age: P35D
+                species: Mus musculus
+
+then we can run the following stand-alone script to deploy the conversion after confirming Step 1 completed successfully.
+
+.. code:
+
+    from neuroconv.tools.data_transfers import submit_aws_batch_job
+
+    job_name = "<unique job name>"
+    docker_container = "ghcr.io/catalystneuro/neuroconv:dev_auto_yaml"
+    efs_name = "<name of EFS>"
+
+    yaml_file_path = "/path/to/test_batch.yml"
+
+    with open(file=yaml_file_path) as file:
+        YAML_STREAM = "".join(file.readlines()).replace('"', "'")
+
+    environment_variables = [dict(name="YAML_STREAM", value=YAML_STREAM)]
+
+    submit_aws_batch_job(
+        job_name=job_name,
+        docker_container=docker_container,
+        efs_name=efs_name,
+        environment_variables=environment_variables,
+    )
+
+
+Step 3: Ensure File Cleanup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TODO: write a dockerfile to perform this step with the API
+
+It's a good idea to confirm that you have access to your EFS from on-demand resources in case you ever need to go in and perform a manual cleanup operation.
+
+Boot up a EC2 t2.micro instance using AWS Linux 2 image with minimal resources.
+
+Create 2 new security groups, `EFS Target` (no policies set) and `EFS Mount` (set inbound policy to NFS with the `EFS Target` as the source).
+
+On the EC2 instance, change the security group to the `EFS Target`. On the EFS Network settings, add the `EFS Mount` group.
+
+Connect to the EC2 instance and run
+
+.. code:
+
+    mkdir ~/efs-mount-point  # or any other name you want; I do recommend keeping this in the home directory (~) for ease of access though
+    sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport fs-<efs number>.efs.us-east-2.amazonaws.com:/ ~/efs-mount-point  # Note that any operations performed on contents of the mounted volume must utilize sudo
+
+and it _should_ work, but this step is known to have various issues. If you did everything exactly as illustrated above, hopefully it should work. At least it did on 4/2/2023.
+
+You can now read, write, and importantly delete any contents on the EFS.
+
+Until the automated DANDI upload is implemented in YAML functionality, you will need to use this method to manually remove the NWB file.
+
+Even after, you should double check to ensure the `cleanup=True` flag to that function properly executed.
+
+
+
+Fully Automated Deployment of NeuroConv on AWS Batch
+----------------------------------------------------
+
+Coming soon...
+
+Approach is essentially the same as the semi-automated, I just submit all jobs at the same time with the jobs being dependent on the completion of one another.