From f73bbee23b8f2258b1a72037f91e04449d64fb7e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 23 Sep 2024 13:35:20 -0600 Subject: [PATCH] Use a github action to centralize gin data preparation (#1095) --- .github/actions/load-data/action.yml | 97 ++++++++++++++++++++++++++++ .github/workflows/dev-testing.yml | 51 ++------------- .github/workflows/doctests.yml | 33 ++-------- .github/workflows/testing.yml | 54 +++------------- CHANGELOG.md | 2 +- 5 files changed, 118 insertions(+), 119 deletions(-) create mode 100644 .github/actions/load-data/action.yml diff --git a/.github/actions/load-data/action.yml b/.github/actions/load-data/action.yml new file mode 100644 index 000000000..77dd3ce93 --- /dev/null +++ b/.github/actions/load-data/action.yml @@ -0,0 +1,97 @@ +name: 'Prepare Datasets' +description: 'Restores data from caches or downloads it from S3.' +inputs: + aws-access-key-id: + description: 'AWS Access Key ID' + required: true + aws-secret-access-key: + description: 'AWS Secret Access Key' + required: true + s3-gin-bucket: + description: 'S3 GIN Bucket URL' + required: true + os: + description: 'Operating system' + required: true +runs: + using: 'composite' + steps: + - name: Get ephy_testing_data current head hash + id: ephys + shell: bash + run: | + HASH=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1) + echo "HASH_EPHY_DATASET=$HASH" >> $GITHUB_OUTPUT + + - name: Cache ephys dataset + uses: actions/cache@v4 + id: cache-ephys-datasets + with: + path: ./ephy_testing_data + key: ephys-datasets-${{ inputs.os }}-${{ steps.ephys.outputs.HASH_EPHY_DATASET }} + + - name: Get ophys_testing_data current head hash + id: ophys + shell: bash + run: | + HASH=$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1) + echo "HASH_OPHYS_DATASET=$HASH" >> $GITHUB_OUTPUT + + - name: Cache ophys dataset + uses: actions/cache@v4 + id: cache-ophys-datasets + with: + path: ./ophys_testing_data + key: ophys-datasets-${{ inputs.os }}-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} + + - name: Get behavior_testing_data current head hash + id: behavior + shell: bash + run: | + HASH=$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1) + echo "HASH_BEHAVIOR_DATASET=$HASH" >> $GITHUB_OUTPUT + + - name: Cache behavior dataset + uses: actions/cache@v4 + id: cache-behavior-datasets + with: + path: ./behavior_testing_data + key: behavior-datasets-${{ inputs.os }}-${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }} + + - name: Determine if downloads are required + id: download-check + shell: bash # Added shell property + run: | + if [[ "${{ steps.cache-ephys-datasets.outputs.cache-hit }}" != 'true' || \ + "${{ steps.cache-ophys-datasets.outputs.cache-hit }}" != 'true' || \ + "${{ steps.cache-behavior-datasets.outputs.cache-hit }}" != 'true' ]]; then + echo "DOWNLOAD_REQUIRED=true" >> $GITHUB_OUTPUT + else + echo "DOWNLOAD_REQUIRED=false" >> $GITHUB_OUTPUT + fi + + - if: ${{ steps.download-check.outputs.DOWNLOAD_REQUIRED == 'true' }} + name: Install and configure AWS CLI + shell: bash + run: | + pip install awscli + aws configure set aws_access_key_id "${{ inputs.aws-access-key-id }}" + aws configure set aws_secret_access_key "${{ inputs.aws-secret-access-key }}" + + - if: ${{ steps.cache-ephys-datasets.outputs.cache-hit != 'true' }} + name: Download ephys dataset from S3 + shell: bash + run: | + aws s3 cp --recursive "${{ inputs.s3-gin-bucket }}/ephy_testing_data" ./ephy_testing_data + + - if: ${{ steps.cache-ophys-datasets.outputs.cache-hit != 'true' }} + name: Download ophys dataset from S3 + shell: bash + run: | + aws s3 cp --recursive "${{ inputs.s3-gin-bucket }}/ophys_testing_data" ./ophys_testing_data + + - if: ${{ steps.cache-behavior-datasets.outputs.cache-hit != 'true' }} + name: Download behavior dataset from S3 + shell: bash + run: | + aws s3 cp --recursive "${{ inputs.s3-gin-bucket }}/behavior_testing_data" ./behavior_testing_data diff --git a/.github/workflows/dev-testing.yml b/.github/workflows/dev-testing.yml index acd7a3d74..31b5329a8 100644 --- a/.github/workflows/dev-testing.yml +++ b/.github/workflows/dev-testing.yml @@ -72,52 +72,13 @@ jobs: run: | pip list - - name: Get ephy_testing_data current head hash - id: ephys - run: echo "::set-output name=HASH_EPHY_DATASET::$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" - - name: Cache ephys dataset - ${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - uses: actions/cache@v4 - id: cache-ephys-datasets + - name: Prepare data for tests + uses: ./.github/actions/load-data with: - path: ./ephy_testing_data - key: ephys-datasets-2024-08-30-ubuntu-latest-${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - - name: Get ophys_testing_data current head hash - id: ophys - run: echo "::set-output name=HASH_OPHYS_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1)" - - name: Cache ophys dataset - ${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - uses: actions/cache@v4 - id: cache-ophys-datasets - with: - path: ./ophys_testing_data - key: ophys-datasets-2022-08-18-ubuntu-latest-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - - name: Get behavior_testing_data current head hash - id: behavior - run: echo "::set-output name=HASH_BEHAVIOR_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1)" - - name: Cache behavior dataset - ${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }} - uses: actions/cache@v4 - id: cache-behavior-datasets - with: - path: ./behavior_testing_data - key: behavior-datasets-2023-07-26-ubuntu-latest-${{ steps.behavior.outputs.HASH_behavior_DATASET }} - - - - - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' || steps.cache-ophys-datasets.outputs.cache-hit != 'true' || steps.cache-behavior-datasets.outputs.cache-hit != 'true' - name: Install and configure AWS CLI - run: | - pip install awscli - aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} - aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} - - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' - name: Download ephys dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ephy_testing_data ./ephy_testing_data - - if: steps.cache-ophys-datasets.outputs.cache-hit != 'true' - name: Download ophys dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ophys_testing_data ./ophys_testing_data - - if: steps.cache-behavior-datasets.outputs.cache-hit != 'true' - name: Download behavior dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/behavior_testing_data ./behavior_testing_data - + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} - name: Run full pytest diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml index d816dbd02..e492eda0c 100644 --- a/.github/workflows/doctests.yml +++ b/.github/workflows/doctests.yml @@ -46,34 +46,13 @@ jobs: with: os: ${{ runner.os }} - - name: Get ephy_testing_data current head hash - id: ephys - run: echo "::set-output name=HASH_EPHY_DATASET::$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" - - name: Cache ephys dataset - ${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - uses: actions/cache@v4 - id: cache-ephys-datasets + - name: Prepare data for tests + uses: ./.github/actions/load-data with: - path: ./ephy_testing_data - key: ephys-datasets-2024-08-30-${{ matrix.os }}-${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - - name: Get ophys_testing_data current head hash - id: ophys - run: echo "::set-output name=HASH_OPHYS_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1)" - - name: Cache ophys dataset - ${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - uses: actions/cache@v4 - id: cache-ophys-datasets - with: - path: ./ophys_testing_data - key: ophys-datasets-2022-08-18-${{ matrix.os }}-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - - name: Get behavior_testing_data current head hash - id: behavior - run: echo "::set-output name=HASH_BEHAVIOR_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1)" - - name: Cache behavior dataset - ${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }} - uses: actions/cache@v4 - id: cache-behavior-datasets - with: - path: ./behavior_testing_data - key: behavior-datasets-2023-07-26-${{ matrix.os }}-${{ steps.behavior.outputs.HASH_behavior_DATASET }} - + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} - name: Run doctests diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 736da6030..06de82c4c 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -93,54 +93,16 @@ jobs: #- name: Run icephys tests # There are no icephys specific tests without data # run: pytest tests/test_icephys -rsx -n auto --dist loadscope - - name: Install full requirements - run: pip install .[full] - - - name: Get ephy_testing_data current head hash - id: ephys - run: echo "::set-output name=HASH_EPHY_DATASET::$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" - - name: Cache ephys dataset - ${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - uses: actions/cache@v4 - id: cache-ephys-datasets - with: - path: ./ephy_testing_data - key: ephys-datasets-2024-08-30-${{ matrix.os }}-${{ steps.ephys.outputs.HASH_EPHY_DATASET }} - - name: Get ophys_testing_data current head hash - id: ophys - run: echo "::set-output name=HASH_OPHYS_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/ophys_testing_data.git HEAD | cut -f1)" - - name: Cache ophys dataset - ${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - uses: actions/cache@v4 - id: cache-ophys-datasets - with: - path: ./ophys_testing_data - key: ophys-datasets-2022-08-18-${{ matrix.os }}-${{ steps.ophys.outputs.HASH_OPHYS_DATASET }} - - name: Get behavior_testing_data current head hash - id: behavior - run: echo "::set-output name=HASH_BEHAVIOR_DATASET::$(git ls-remote https://gin.g-node.org/CatalystNeuro/behavior_testing_data.git HEAD | cut -f1)" - - name: Cache behavior dataset - ${{ steps.behavior.outputs.HASH_BEHAVIOR_DATASET }} - uses: actions/cache@v4 - id: cache-behavior-datasets + - name: Prepare data for tests + uses: ./.github/actions/load-data with: - path: ./behavior_testing_data - key: behavior-datasets-2023-07-26-${{ matrix.os }}-${{ steps.behavior.outputs.HASH_behavior_DATASET }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} - - - - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' || steps.cache-ophys-datasets.outputs.cache-hit != 'true' || steps.cache-behavior-datasets.outputs.cache-hit != 'true' - name: Install and configure AWS CLI - run: | - pip install awscli - aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} - aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} - - if: steps.cache-ephys-datasets.outputs.cache-hit != 'true' - name: Download ephys dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ephy_testing_data ./ephy_testing_data - - if: steps.cache-ophys-datasets.outputs.cache-hit != 'true' - name: Download ophys dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/ophys_testing_data ./ophys_testing_data - - if: steps.cache-behavior-datasets.outputs.cache-hit != 'true' - name: Download behavior dataset from S3 - run: aws s3 cp --recursive ${{ secrets.S3_GIN_BUCKET }}/behavior_testing_data ./behavior_testing_data + - name: Install full requirements + run: pip install .[full] - name: Run full pytest with coverage run: pytest -vv -rsx -n auto --dist loadscope --cov=neuroconv --cov-report xml:./codecov.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 751d7cc55..9c7bc13be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ * Test that zarr backend_configuration works in gin data tests [PR #1094](https://github.com/catalystneuro/neuroconv/pull/1094) * Consolidated weekly workflows into one workflow and added email notifications [PR #1088](https://github.com/catalystneuro/neuroconv/pull/1088) * Avoid running link test when the PR is on draft [PR #1093](https://github.com/catalystneuro/neuroconv/pull/1093) - +* Centralize gin data preparation in a github action [PR #1095](https://github.com/catalystneuro/neuroconv/pull/1095) # v0.6.4 (September 17, 2024)