Skip to content

Make metadata easier to use. #109

Make metadata easier to use.

Make metadata easier to use. #109

name: Create Docker Image and Azure Pool
# This GitHub Actions workflow builds a Docker image for the
# cfa-epinow2-pipeline-docker project. In-container tests can be added here.
on:
pull_request:
branches:
- main
workflow_dispatch:
env:
# Together, these form: cfaprdbatchcr.azurecr.io/cfa-epinow2-pipeline
REGISTRY: cfaprdbatchcr.azurecr.io
IMAGE_NAME: cfa-epinow2-pipeline
jobs:
build-dependencies-image:
runs-on: cfa-cdcgov # VM based runner serving CFA's cdcgov repos (as opposed to cdcent)
name: Build dependencies image
outputs:
tag: ${{ steps.image-tag.outputs.tag }}
commit-msg: ${{ steps.commit-message.outputs.message }}
steps:
#########################################################################
# Retrieving the commit message
# We need to ensure we are checking out the commit sha that triggered the
# workflow, not the PR's head sha. This is because the PR's head sha may
# be a merge commit, which will not have the commit message we need.
#########################################################################
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Getting the commit message
id: commit-message
run: echo "message=$(git log -1 --pretty=%s HEAD)" >> $GITHUB_OUTPUT
- name: Checking out the latest (may be merge if PR)
uses: actions/checkout@v4
# From: https://stackoverflow.com/a/58035262/2097171
- name: Extract branch name
shell: bash
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
id: branch-name
#########################################################################
# Getting the tag
# The tag will be used for both the docker image and the batch pool
#########################################################################
- name: Figure out tag (either latest if it is main or the branch name)
id: image-tag
run: |
if [ "${{ steps.branch-name.outputs.branch }}" = "main" ]; then
echo "tag=latest" >> $GITHUB_OUTPUT
else
echo "tag=${{ steps.branch-name.outputs.branch }}" >> $GITHUB_OUTPUT
fi
- name: Check cache for base image
uses: actions/cache@v4
id: cache
with:
key: docker-dependencies-${{ runner.os }}-${{ hashFiles('./DESCRIPTION', './Dockerfile-dependencies') }}-${{ steps.image-tag.outputs.tag }}
lookup-only: true
path:
./DESCRIPTION
- name: Login to the Container Registry
if: steps.cache.outputs.cache-hit != 'true'
uses: docker/login-action@v3
with:
registry: "cfaprdbatchcr.azurecr.io"
username: "cfaprdbatchcr"
password: ${{ secrets.CFAPRDBATCHCR_REGISTRY_PASSWORD }}
- name: Build and push
if: steps.cache.outputs.cache-hit != 'true'
uses: docker/build-push-action@v6
with:
push: true
tags: |
${{ env.REGISTRY}}/${{ env.IMAGE_NAME }}-dependencies:${{ steps.image-tag.outputs.tag }}
file: ./Dockerfile-dependencies
build-pipeline-image:
name: Build pipeline image
needs: build-dependencies-image
runs-on: cfa-cdcgov
outputs:
tag: ${{ needs.build-dependencies-image.outputs.tag }}
commit-msg: ${{ needs.build-dependencies-image.outputs.commit-msg }}
steps:
- name: Login to the Container Registry
uses: docker/login-action@v3
with:
registry: "cfaprdbatchcr.azurecr.io"
username: "cfaprdbatchcr"
password: ${{ secrets.CFAPRDBATCHCR_REGISTRY_PASSWORD }}
- name: Build and push model pipeline image for Azure batch
id: build_and_push_model_image
uses: docker/build-push-action@v6
with:
push: true # This can be toggled manually for tweaking.
tags: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ needs.build-dependencies-image.outputs.tag }}
file: ./Dockerfile
build-args: |
TAG=${{ needs.build-dependencies-image.outputs.tag }}
batch-pool:
name: Create Batch Pool and Submit Jobs
runs-on: cfa-cdcgov
needs: build-pipeline-image
container: python:3.12
permissions:
contents: read
packages: write
env:
TAG: ${{ needs.build-pipeline-image.outputs.tag }}
COMMIT_MSG: ${{ needs.build-pipeline-image.outputs.commit-msg }}
# Every Azure Batch Pool parameter can simply go here,
# no python module or config toml necessary
POOL_ID: "cfa-epinow2-${{ needs.build-pipeline-image.outputs.tag }}"
BATCH_ACCOUNT: "cfaprdba"
BATCH_ENDPOINT: "https://cfaprdba.eastus.batch.azure.com/"
VM_IMAGE_TAG: "canonical:0001-com-ubuntu-server-focal:20_04-lts"
NODE_AGENT_SKU_ID: "batch.node.ubuntu 20.04"
VM_SIZE: "standard_a4m_v2"
RESOURCE_GROUP: ${{ secrets.PRD_RESOURCE_GROUP }}
steps:
- name: Checkout Repo
id: checkout_repo
uses: actions/checkout@v4
# This step is only needed during the action to write the
# config file. Users can have a config file stored in their VAP
# sessions. In the future, we will have the config.toml file
# distributed with the repo (encrypted).
- name: Writing out config file
run: |
cat <<EOF > pool-config-${{ github.sha }}.toml
${{ secrets.POOL_CONFIG_TOML }}
EOF
# Replacing placeholders in the config file
sed -i 's|{{ IMAGE_NAME }}|${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ env.TAG }}|g' pool-config-${{ github.sha }}.toml
sed -i 's|{{ VM_SIZE }}|${{ env.VM_SIZE }}|g' pool-config-${{ github.sha }}.toml
sed -i 's|{{ POOL_ID }}|${{ env.POOL_ID }}|g' pool-config-${{ github.sha }}.toml
- name: Ensuring the Azure CLI is installed
run: |
apt-get update && apt-get install -y --no-install-recommends azure-cli
- name: Login to Azure with NNH Service Principal
id: azure_login_2
uses: azure/login@v2
with:
# managed by EDAV. Contact Amit Mantri or Jon Kislin if you have issues.
creds: ${{ secrets.EDAV_CFA_PREDICT_NNHT_SP }}
#########################################################################
# Checking if the pool exists
# This is done via az batch pool list. If there is no pool matching the
# pool id (which is a function of the tag, i.e., branch name), then we
# pool-exists will be ''.
#########################################################################
- name: Check if pool exists
id: check_pool_id
run: |
az batch account login \
--resource-group ${{ secrets.PRD_RESOURCE_GROUP }} \
--name "${{ env.BATCH_ACCOUNT }}"
az batch pool list \
--output tsv \
--filter "(id eq '${{ env.POOL_ID }}')" \
--query "[].[id, allocationState, creationTime]" > \
pool-list-${{ github.sha }}
echo "pool-exists=$(cat pool-list-${{ github.sha }})" >> \
$GITHUB_OUTPUT
- name: Create cfa-epinow2-pipeline Pool
id: create_batch_pool
# This is a conditional step that will only run if the pool does not
# exist
if: ${{ steps.check_pool_id.outputs.pool-exists == '' }}
# The call to the az cli that actually generates the pool
run: |
# Running the python script azure/pool.py passing the config file
# as an argument
pip install -r azure/requirements.txt
python3 azure/pool.py \
pool-config-${{ github.sha }}.toml \
batch-autoscale-formula.txt
#########################################################################
# Deleting the pool only if (a) it exists and (b) the commit message
# contains '[delete pool]'.
#########################################################################
- name: Delete the pool
if: ${{ (steps.check_pool_id.outputs.pool-exists != '' || steps.create_batch_pool.outputs.pool-created == 'yes' ) && contains( env.COMMIT_MSG, '[delete pool]' )}}
run: |
az batch account login \
--resource-group ${{ secrets.PRD_RESOURCE_GROUP }} \
--name "${{ env.BATCH_ACCOUNT }}"
az batch pool delete \
--pool-id ${{ env.POOL_ID }} \
--yes