Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pax and t5x build workflows and source to allow TE to be configurable #364

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/container/Dockerfile.pax.amd64
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ ADD install-te.sh /usr/local/bin
ENV NVTE_FRAMEWORK=jax
ARG REPO_PAXML=https://github.com/google/paxml.git
ARG REPO_PRAXIS=https://github.com/google/praxis.git
ARG REPO_TE=https://github.com/NVIDIA/TransformerEngine.git
ARG REF_PAXML=main
ARG REF_PRAXIS=main
ARG REF_TE=main
RUN <<"EOF" bash -ex
install-pax.sh --defer --from_paxml ${REPO_PAXML} --from_praxis ${REPO_PRAXIS} --ref_paxml ${REF_PAXML} --ref_praxis ${REF_PRAXIS}
install-flax.sh --defer
install-te.sh --defer
install-te.sh --defer --from ${REPO_TE} --ref ${REF_TE}

if [[ -f /opt/requirements-defer.txt ]]; then
# SKIP_HEAD_INSTALLS avoids having to install jax from Github source so that
Expand Down
3 changes: 3 additions & 0 deletions .github/container/Dockerfile.pax.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ ADD test-pax.sh /usr/local/bin
# TODO: Utilize these build-args and use them when installing pax
# ARG REPO_PAXML=https://github.com/google/paxml.git
# ARG REPO_PRAXIS=https://github.com/google/praxis.git
# ARG REPO_TE=https://github.com/NVIDIA/TransformerEngine.git
# ARG REF_PAXML=main
# ARG REF_PRAXIS=main
# ARG REF_TE=main
# install-pax.sh --defer --from_paxml ${REPO_PAXML} --from_praxis ${REPO_PRAXIS} --ref_paxml ${REF_PAXML} --ref_praxis ${REF_PRAXIS}
# install-te.sh --defer --from ${REPO_TE} --ref ${REF_TE}
12 changes: 12 additions & 0 deletions .github/workflows/_build_pax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ on:
description: URL of Praxis repository to check out
required: false
default: "https://github.com/google/praxis.git"
REPO_TE:
type: string
description: URL of TE repository to check out
required: false
default: "https://github.com/NVIDIA/TransformerEngine.git"
REF_PAXML:
type: string
description: Git commit, tag, or branch for Paxml
Expand All @@ -33,6 +38,11 @@ on:
description: Git commit, tag, or branch for Praxis
required: false
default: main
REF_TE:
type: string
description: Git commit, tag, or branch for TE
required: false
default: main
outputs:
DOCKER_TAGS:
description: "Tags of the image built"
Expand Down Expand Up @@ -101,8 +111,10 @@ jobs:
BUILD_DATE=${{ inputs.BUILD_DATE }}
REPO_PAXML=${{ inputs.REPO_PAXML }}
REPO_PRAXIS=${{ inputs.REPO_PRAXIS }}
REPO_TE=${{ inputs.REPO_TE }}
REF_PAXML=${{ inputs.REF_PAXML }}
REF_PRAXIS=${{ inputs.REF_PRAXIS }}
REF_TE=${{ inputs.REF_TE }}

# Temporary workaround until the following issues are solved:
# https://github.com/orgs/community/discussions/17245
Expand Down
36 changes: 33 additions & 3 deletions .github/workflows/nightly-pax-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,26 @@ on:
branches: [main]
workflow_dispatch:
inputs:
REPO_TE:
type: string
description: URL of TE repository to check out
required: false
default: "https://github.com/NVIDIA/TransformerEngine.git"
REF_TE:
type: string
description: Git commit, tag, or branch for TE
required: false
default: main
PUBLISH:
type: boolean
description: Publish nightly images and update the 'latest' tag?
default: false
required: false

env:
DEFAULT_REPO_TE: https://github.com/NVIDIA/TransformerEngine.git
DEFAULT_REF_TE: main

permissions:
contents: read # to fetch code
actions: write # to cancel previous workflows
Expand All @@ -25,21 +39,37 @@ jobs:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-22.04
outputs:
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
BUILD_DATE: ${{ steps.meta.outputs.BUILD_DATE }}
REPO_TE: ${{ steps.meta.outputs.REPO_TE }}
REF_TE: ${{ steps.meta.outputs.REF_TE }}
steps:
- name: Set build date
id: date
- name: Set build metadata
id: meta
shell: bash -x -e {0}
run: |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d')
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT
if [[ -z "${{ inputs.REPO_TE }}" ]]; then
REPO_TE=${{ env.DEFAULT_REPO_TE }}
else
REPO_TE=${{ inputs.REPO_TE }}
fi
echo "REPO_TE=$REPO_TE" >> $GITHUB_OUTPUT
if [[ -z "${{ inputs.REF_TE }}" ]]; then
REF_TE=${{ env.DEFAULT_REF_TE }}
else
REF_TE=${{ inputs.REF_TE }}
fi
echo "REF_TE=$REF_TE" >> $GITHUB_OUTPUT

build:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
needs: metadata
uses: ./.github/workflows/_build_pax.yaml
with:
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
REPO_TE: ${{ needs.metadata.outputs.REPO_TE }}
REF_TE: ${{ needs.metadata.outputs.REF_TE }}
secrets: inherit

publish:
Expand Down
36 changes: 33 additions & 3 deletions .github/workflows/nightly-t5x-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,26 @@ on:
branches: [main]
workflow_dispatch:
inputs:
REPO_TE:
type: string
description: URL of TE repository to check out
required: false
default: "https://github.com/NVIDIA/TransformerEngine.git"
REF_TE:
type: string
description: Git commit, tag, or branch for TE
required: false
default: main
PUBLISH:
type: boolean
description: Publish dated images and update the 'latest' tag?
default: false
required: false

env:
DEFAULT_REPO_TE: https://github.com/NVIDIA/TransformerEngine.git
DEFAULT_REF_TE: main

permissions:
contents: read # to fetch code
actions: write # to cancel previous workflows
Expand All @@ -25,21 +39,37 @@ jobs:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-22.04
outputs:
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
BUILD_DATE: ${{ steps.meta.outputs.BUILD_DATE }}
REPO_TE: ${{ steps.meta.outputs.REPO_TE }}
REF_TE: ${{ steps.meta.outputs.REF_TE }}
steps:
- name: Set build date
id: date
- name: Set build metadata
id: meta
shell: bash -x -e {0}
run: |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d')
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT
if [[ -z "${{ inputs.REPO_TE }}" ]]; then
REPO_TE=${{ env.DEFAULT_REPO_TE }}
else
REPO_TE=${{ inputs.REPO_TE }}
fi
echo "REPO_TE=$REPO_TE" >> $GITHUB_OUTPUT
if [[ -z "${{ inputs.REF_TE }}" ]]; then
REF_TE=${{ env.DEFAULT_REF_TE }}
else
REF_TE=${{ inputs.REF_TE }}
fi
echo "REF_TE=$REF_TE" >> $GITHUB_OUTPUT

build:
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch'
needs: metadata
uses: ./.github/workflows/_build_t5x.yaml
with:
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
REPO_TE: ${{ needs.metadata.outputs.REPO_TE }}
REF_TE: ${{ needs.metadata.outputs.REF_TE }}
secrets: inherit

publish:
Expand Down
Loading