diff --git a/.github/workflows/_build_pax.yaml b/.github/workflows/_build_pax.yaml index 62bc175ad..de2b3dafd 100644 --- a/.github/workflows/_build_pax.yaml +++ b/.github/workflows/_build_pax.yaml @@ -163,7 +163,7 @@ jobs: # bring in utility functions source .github/workflows/scripts/to_json.sh - badge_label='PAX ${{ inputs.ARCHITECTURE }} build' + badge_label='Upstream PAX ${{ inputs.ARCHITECTURE }} build' tags="${{ steps.final-metadata.outputs.tags }}" digest="${{ steps.final-build.outputs.digest }}" outcome="${{ steps.final-build.outcome }}" @@ -171,11 +171,11 @@ jobs: if [[ ${outcome} == "success" ]]; then badge_message="pass" badge_color=brightgreen - summary="PAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" + summary="Upstream PAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" else badge_message="fail" badge_color=red - summary="PAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" + summary="Upstream PAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" fi to_json \ diff --git a/.github/workflows/_build_rosetta.yaml b/.github/workflows/_build_rosetta.yaml index 02dcc0951..12bd9a314 100644 --- a/.github/workflows/_build_rosetta.yaml +++ b/.github/workflows/_build_rosetta.yaml @@ -21,15 +21,20 @@ on: description: 'Build date in YYYY-MM-DD format' required: false default: 'NOT SPECIFIED' + ARTIFACT_NAME: + type: string + description: 'Name of the artifact zip file' + required: false + default: 'artifact-rosetta-build' BADGE_FILENAME: type: string - description: 'Name of the endpoint JSON file for shields.io badge' + description: 'Name of the endpoint JSON file for shields.io badge (w/o .json || arch || library)' required: false - default: 'badge-rosetta-build' + default: 'badge-rosetta-build' outputs: DOCKER_TAG_MEALKIT: - description: 'Tags of the mealkit image build' - value: $ {{ jobs.build-rosetta.outputs.DOCKER_TAG_MEALKIT }} + description: "Tags of the 'mealkit' image built" + value: ${{ jobs.build-rosetta.outputs.DOCKER_TAG_MEALKIT }} DOCKER_TAG_FINAL: description: "Tags of the complete image built" value: ${{ jobs.build-rosetta.outputs.DOCKER_TAG_FINAL }} @@ -48,7 +53,8 @@ jobs: build-rosetta: runs-on: [self-hosted, "${{ inputs.ARCHITECTURE }}", small] env: - BADGE_FILENAME_FULL: ${{ inputs.BADGE_FILENAME}}-${{ inputs.ARCHITECTURE}}.json + BADGE_FILENAME_FULL: ${{ inputs.BADGE_FILENAME }}-${{ inputs.BASE_LIBRARY }}-${{ inputs.ARCHITECTURE }}.json + ARTIFACT_NAME_FULL: ${{ inputs.ARTIFACT_NAME }}-${{ inputs.BASE_LIBRARY }}-${{ inputs.ARCHITECTURE }} outputs: DOCKER_TAG_MEALKIT: ${{ steps.mealkit-metadata.outputs.tags }} DOCKER_TAG_FINAL: ${{ steps.final-metadata.outputs.tags }} @@ -124,4 +130,55 @@ jobs: labels: ${{ steps.final-metadata.outputs.labels }} target: final build-args: | - BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }} \ No newline at end of file + BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }} + + - name: Extract patches + run: rosetta/scripts/extract-patches.sh ${{ steps.final-metadata.outputs.tags }} + + - name: Archive generated patches + uses: actions/upload-artifact@v3 + with: + name: patches-${{ inputs.BASE_LIBRARY }}-${{ github.run_id }}-${{ inputs.BUILD_DATE }}-${{ inputs.ARCHITECTURE }} + path: rosetta/patches + + - name: Generate sitrep + if: success() || failure() + shell: bash -x -e {0} + run: | + # bring in utility functions + source .github/workflows/scripts/to_json.sh + + badge_label='${{ inputs.BASE_LIBRARY }} ${{ inputs.ARCHITECTURE }} build' + tags="${{ steps.final-metadata.outputs.tags }}" + digest="${{ steps.final-build.outputs.digest }}" + outcome="${{ steps.final-build.outcome }}" + + if [[ ${outcome} == "success" ]]; then + badge_message="pass" + badge_color=brightgreen + summary="${{ inputs.BASE_LIBRARY }} build on ${{ inputs.ARCHITECTURE }}: $badge_message" + else + badge_message="fail" + badge_color=red + summary="${{ inputs.BASE_LIBRARY }} build on ${{ inputs.ARCHITECTURE }}: $badge_message" + fi + + to_json \ + summary \ + badge_label tags digest outcome \ + > sitrep.json + + schemaVersion=1 \ + label="${badge_label}" \ + message="${badge_message}" \ + color="${badge_color}" \ + to_json schemaVersion label message color \ + > ${{ env.BADGE_FILENAME_FULL }} + + - name: Upload sitrep and badge + uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT_NAME_FULL }} + path: | + sitrep.json + ${{ env.BADGE_FILENAME_FULL }} diff --git a/.github/workflows/_build_t5x.yaml b/.github/workflows/_build_t5x.yaml index 6efcde8f9..890a47141 100644 --- a/.github/workflows/_build_t5x.yaml +++ b/.github/workflows/_build_t5x.yaml @@ -163,7 +163,7 @@ jobs: # bring in utility functions source .github/workflows/scripts/to_json.sh - badge_label='T5X ${{ inputs.ARCHITECTURE }} build' + badge_label='Upstream T5X ${{ inputs.ARCHITECTURE }} build' tags="${{ steps.final-metadata.outputs.tags }}" digest="${{ steps.final-build.outputs.digest }}" outcome="${{ steps.final-build.outcome }}" @@ -171,11 +171,11 @@ jobs: if [[ ${outcome} == "success" ]]; then badge_message="pass" badge_color=brightgreen - summary="T5X build on ${{ inputs.ARCHITECTURE }}: $badge_message" + summary="Upstream T5X build on ${{ inputs.ARCHITECTURE }}: $badge_message" else badge_message="fail" badge_color=red - summary="T5X build on ${{ inputs.ARCHITECTURE }}: $badge_message" + summary="Upstream T5X build on ${{ inputs.ARCHITECTURE }}: $badge_message" fi to_json \ diff --git a/.github/workflows/nightly-rosetta-pax-build.yaml b/.github/workflows/nightly-rosetta-pax-build.yaml index 0acc36d3d..74c689924 100644 --- a/.github/workflows/nightly-rosetta-pax-build.yaml +++ b/.github/workflows/nightly-rosetta-pax-build.yaml @@ -10,7 +10,7 @@ on: inputs: BASE_IMAGE: type: string - description: 'PAX image built by NVIDIA/JAX-Toolbox' + description: 'Upstream Pax mealkit image without $arch-mealkit suffix, e.g., (ghcr.io/nvidia/jax-toolbox-internal:6857094059-upstream-pax). Leaving empty implies ghcr.io/nvidia/upstream-pax:mealkit' default: '' required: false PUBLISH: @@ -49,15 +49,6 @@ jobs: if: steps.if-upstream-failed.outputs.UPSTREAM_FAILED == 'true' uses: styfle/cancel-workflow-action@0.12.0 - - name: Determine if the resulting container should be 'published' - id: if-publish - shell: bash -x -e {0} - run: - # A container should be published if: - # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or - # 2) the workflow is triggered by workflow_run (i.e., a nightly build) - echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT - - name: Set build date id: date shell: bash -x -e {0} @@ -77,6 +68,10 @@ jobs: BASE_IMAGE_ARM64=${{ inputs.BASE_IMAGE }}-arm64-mealkit fi echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT + # A container should be published if: + # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or + # 2) the workflow is triggered by workflow_run (i.e., a nightly build) + echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT echo "BASE_IMAGE_AMD64=${BASE_IMAGE_AMD64}" >> $GITHUB_OUTPUT echo "BASE_IMAGE_ARM64=${BASE_IMAGE_ARM64}" >> $GITHUB_OUTPUT @@ -100,6 +95,29 @@ jobs: BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE_ARM64 }} secrets: inherit + publish-build-badge: + needs: [metadata, amd64, arm64] + uses: ./.github/workflows/_publish_badge.yaml + if: always() + with: + ENDPOINT_FILENAME: 'rosetta-pax-build-status.json' + PUBLISH: ${{ needs.metadata.outputs.PUBLISH == 'true' }} + SCRIPT: | + if [[ ${{ needs.amd64.result }} == "success" && ${{ needs.arm64.result }} == "success" ]]; then + BADGE_COLOR=brightgreen + MSG=passing + STATUS=success + else + BADGE_COLOR=red + MSG=failing + STATUS=failure + fi + echo "LABEL='nightly'" >> $GITHUB_OUTPUT + echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT + echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT + echo "STATUS='${STATUS}'" >> ${GITHUB_OUTPUT} + secrets: inherit + publish-mealkit: needs: [metadata, amd64, arm64] if: needs.metadata.outputs.PUBLISH == 'true' @@ -113,8 +131,17 @@ jobs: type=raw,value=mealkit,priority=500 type=raw,value=mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500 + # TODO: Test ARM when runners available + test-amd64: + needs: amd64 + uses: ./.github/workflows/_test_pax_rosetta.yaml + with: + PAX_IMAGE: ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} + secrets: inherit + + # TODO: ARM Tests publish-final: - needs: [metadata, amd64, arm64] + needs: [metadata, amd64, arm64, test-amd64] if: needs.metadata.outputs.PUBLISH == 'true' uses: ./.github/workflows/_publish_container.yaml with: @@ -123,49 +150,45 @@ jobs: ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }} TARGET_IMAGE: pax TARGET_TAGS: | - type=raw,value=latest,priority=1000 - type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 + ${{ needs.test-amd64.outputs.TEST_STATUS == 'success' && 'type=raw,value=latest,priority=1000' || '' }} + type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 - test-pax: - needs: [metadata, amd64, arm64] - uses: ./.github/workflows/_test_pax_rosetta.yaml - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' - with: - PAX_IMAGE: ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} - secrets: inherit - - publish-test: - needs: [metadata, amd64, arm64, test-pax] + # TODO: ARM Tests + publish-test-badge: + needs: [metadata, publish-build-badge, test-amd64] uses: ./.github/workflows/_publish_badge.yaml - if: ( always() ) + if: always() secrets: inherit with: ENDPOINT_FILENAME: 'rosetta-pax-overall-test-status.json' - PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} + PUBLISH: ${{ needs.metadata.outputs.PUBLISH == 'true' }} SCRIPT: | - PAX_STATUS=${{ needs.test-pax.outputs.TEST_STATUS }} + PAX_STATUS=${{ needs.test-amd64.outputs.TEST_STATUS }} echo "LABEL='Tests'" >> $GITHUB_OUTPUT - if [[ ${{ needs.amd64.result }} == "success" && ${{ needs.arm64.result }} == "success" ]]; then + STATUS=failure + if [[ ${{ needs.publish-build-badge.outputs.STATUS }} == "success" ]]; then if [[ $PAX_STATUS == "success" ]]; then COLOR=brightgreen MESSAGE="MGMN passed" + STATUS=success else COLOR=red MESSAGE="MGMN failed" fi else - MESSAGE="n/a" COLOR="red" + MESSAGE="n/a" fi - echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT + echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT + echo "STATUS='${STATUS}'" >> ${GITHUB_OUTPUT} finalize: if: always() - needs: [metadata, amd64, arm64] + needs: [metadata, amd64, arm64, test-amd64] uses: ./.github/workflows/_finalize.yaml with: PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }} diff --git a/.github/workflows/nightly-rosetta-t5x-build-test.yaml b/.github/workflows/nightly-rosetta-t5x-build-test.yaml index 9c734b57c..e24ac2e57 100644 --- a/.github/workflows/nightly-rosetta-t5x-build-test.yaml +++ b/.github/workflows/nightly-rosetta-t5x-build-test.yaml @@ -10,7 +10,7 @@ on: inputs: BASE_IMAGE: type: string - description: 'T5x image built by NVIDIA/JAX-Toolbox' + description: 'Upstream T5x mealkit image without $arch-mealkit suffix, e.g., (ghcr.io/nvidia/jax-toolbox-internal:6857094059-upstream-t5x). Leaving empty implies ghcr.io/nvidia/upstream-t5x:mealkit' default: '' required: false PUBLISH: @@ -77,6 +77,10 @@ jobs: BASE_IMAGE_ARM64=${{ inputs.BASE_IMAGE }}-arm64-mealkit fi echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT + # A container should be published if: + # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or + # 2) the workflow is triggered by workflow_run (i.e., a nightly build) + echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT echo "BASE_IMAGE_AMD64=${BASE_IMAGE_AMD64}" >> $GITHUB_OUTPUT echo "BASE_IMAGE_ARM64=${BASE_IMAGE_ARM64}" >> $GITHUB_OUTPUT @@ -89,21 +93,47 @@ jobs: BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE_AMD64 }} secrets: inherit - + + # TODO: Can't build ARM until https://github.com/NVIDIA/JAX-Toolbox/pull/252 is available arm64: needs: metadata runs-on: ubuntu-22.04 outputs: + DOCKER_TAG_FINAL: '' DOCKER_TAG_MEALKIT: '' steps: - name: Generate placeholder warning shell: bash -x -e {0} run: | echo "WARNING: arm64 build is not yet supported" - + + # TODO: ARM + publish-build-badge: + needs: [metadata, amd64, arm64] + uses: ./.github/workflows/_publish_badge.yaml + if: always() + secrets: inherit + with: + ENDPOINT_FILENAME: 'rosetta-t5x-build-status.json' + PUBLISH: ${{ needs.metadata.outputs.PUBLISH == 'true' }} + SCRIPT: | + if [[ ${{ needs.amd64.result }} == "success" && ${{ needs.arm64.result }} == "success" ]]; then + BADGE_COLOR=brightgreen + MSG=passing + STATUS=success + else + BADGE_COLOR=red + MSG=failing + STATUS=failure + fi + echo "LABEL='nightly'" >> $GITHUB_OUTPUT + echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT + echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT + echo "STATUS='${STATUS}'" >> ${GITHUB_OUTPUT} + publish-mealkit: needs: [metadata, amd64, arm64] - if: needs.metadata.output.PUBLISH == 'true' + if: needs.metadata.outputs.PUBLISH == 'true' uses: ./.github/workflows/_publish_container.yaml with: SOURCE_IMAGE: | @@ -112,73 +142,71 @@ jobs: TARGET_IMAGE: t5x TARGET_TAGS: | type=raw,value=mealkit,priority=500 - type=raw,value=mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500 - - publish-final: - needs: [metadata, amd64, arm64] - if: needs.metadata.outputs.PUBLISH == 'true' - uses: ./.github/workflows/_publish_container.yaml - with: - SOURCE_IMAGE: | - ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} - ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }} - TARGET_IMAGE: t5x - TARGET_TAGS: | - type=raw,value=latest,priority=1000 - type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 + type=raw,value=mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500 - test-unit: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' - needs: [metadata, amd64, arm64] + test-unit-amd64: + needs: amd64 uses: ./.github/workflows/_test_rosetta.yaml with: ROSETTA_IMAGE: ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} secrets: inherit - test-t5x: - needs: [metadata, amd64, arm64] + test-t5x-amd64: + needs: amd64 uses: ./.github/workflows/_test_t5x_rosetta.yaml - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: T5X_IMAGE: ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} secrets: inherit - test-vit: - needs: [metadata, amd64, arm64] + test-vit-amd64: + needs: amd64 uses: ./.github/workflows/_test_vit.yaml - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: ROSETTA_T5X_IMAGE: ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} secrets: inherit + publish-final: + needs: [metadata, amd64, arm64, test-t5x-amd64, test-vit-amd64, test-unit-amd64] + if: needs.metadata.outputs.PUBLISH == 'true' + uses: ./.github/workflows/_publish_container.yaml + with: + SOURCE_IMAGE: | + ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} + ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }} + TARGET_IMAGE: t5x + TARGET_TAGS: | + ${{ ( needs.test-t5x-amd64.outputs.TEST_STATUS == 'success' && needs.test-vit-amd64.outputs.TEST_STATUS == 'success' && needs.test-unit-amd64.outputs.TEST_STATUS == 'success' ) && 'type=raw,value=latest,priority=1000' || '' }} + type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 + publish-t5x: - needs: [metadata, test-t5x, test-vit] + needs: [metadata, test-t5x-amd64, test-vit-amd64] uses: ./.github/workflows/_publish_t5x_results.yaml - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} EXPERIMENT_SUBDIR: ROSETTA_T5X secrets: inherit - publish-test: - needs: [metadata, amd64, arm64, test-unit, test-t5x, test-vit] + publish-test-badge: + needs: [metadata, publish-build-badge, test-unit-amd64, test-t5x-amd64, test-vit-amd64] uses: ./.github/workflows/_publish_badge.yaml - if: ( always() ) + if: always() secrets: inherit with: ENDPOINT_FILENAME: 'rosetta-t5x-overall-test-status.json' - PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} + PUBLISH: ${{ needs.metadata.outputs.PUBLISH == 'true' }} SCRIPT: | - UNIT_STATUS=${{ needs.test-unit.outputs.TEST_STATUS }} - T5X_STATUS=${{ needs.test-t5x.outputs.TEST_STATUS }} - VIT_STATUS=${{ needs.test-vit.outputs.TEST_STATUS }} + UNIT_STATUS=${{ needs.test-unit-amd64.outputs.TEST_STATUS }} + T5X_STATUS=${{ needs.test-t5x-amd64.outputs.TEST_STATUS }} + VIT_STATUS=${{ needs.test-vit-amd64.outputs.TEST_STATUS }} echo "LABEL='Tests'" >> $GITHUB_OUTPUT - if [[ ${{ needs.amd64.result }} == "success" && ${{ needs.arm64.result }} == "success" ]]; then + STATUS=failure + if [[ ${{ needs.publish-build-badge.outputs.STATUS }} == "success" ]]; then if [[ $UNIT_STATUS == "success" ]] && [[ $T5X_STATUS == "success" ]] && [[ $VIT_STATUS == "success" ]]; then COLOR=brightgreen MESSAGE="Unit passed / MGMN passed" + STATUS=success elif [[ $UNIT_STATUS == "success" ]]; then COLOR=yellow MESSAGE="Unit passed / MGMN failed" @@ -190,18 +218,18 @@ jobs: MESSAGE="Unit failed / MGMN failed" fi else - MESSAGE="n/a" COLOR="red" + MESSAGE="n/a" fi echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT + echo "STATUS='${STATUS}'" >> ${GITHUB_OUTPUT} finalize: if: always() - needs: [metadata, amd64, arm64] + needs: [metadata, amd64, arm64, test-t5x-amd64, test-vit-amd64, test-unit-amd64] uses: ./.github/workflows/_finalize.yaml with: PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }} secrets: inherit - diff --git a/.github/workflows/nightly-t5x-build.yaml b/.github/workflows/nightly-t5x-build.yaml index 8f0ad277f..f023826d5 100644 --- a/.github/workflows/nightly-t5x-build.yaml +++ b/.github/workflows/nightly-t5x-build.yaml @@ -65,6 +65,7 @@ jobs: needs: metadata runs-on: ubuntu-22.04 outputs: + DOCKER_TAG_FINAL: '' DOCKER_TAG_MEALKIT: '' steps: - name: Generate placeholder warning diff --git a/rosetta/Dockerfile.pax b/rosetta/Dockerfile.pax index b03e1ffd8..489bfd568 100644 --- a/rosetta/Dockerfile.pax +++ b/rosetta/Dockerfile.pax @@ -35,7 +35,7 @@ ARG PRAXIS_PATCHLIST COPY --from=rosetta-source / /opt/rosetta WORKDIR /opt/rosetta RUN --mount=target=/opt/pax-mirror,from=pax-mirror-source,readwrite \ - --mount=target=/opt/praxis-mirror,from=praxis-mirror-source,readwrite <> /opt/pip-tools.d/manifest.t5x echo "-e file:///opt/rosetta" >> /opt/pip-tools.d/manifest.t5x EOF diff --git a/rosetta/README.md b/rosetta/README.md index 7e0baa98e..c91a4ed3f 100644 --- a/rosetta/README.md +++ b/rosetta/README.md @@ -8,13 +8,20 @@ LLM, CV, and multimodal models. ```bash ROSETTA_BASE=t5x # or pax -docker buildx build --target rosetta --tag rosetta:latest -f Dockerfile.${ROSETTA_BASE} . - -# If you want a devel image with test dependencies -docker buildx build --target rosetta-devel --tag rosetta-devel:latest -f Dockerfile.${ROSETTA_BASE} . +docker buildx build --tag rosetta:latest -f Dockerfile.${ROSETTA_BASE} . # If you want to specify a specific base image -docker buildx build --target rosetta --tag rosetta:latest -f Dockerfile.${ROSETTA_BASE} --build-arg BASE_IMAGE=ghcr.io/nvidia/${ROSETTA_BASE}:nightly-2023-05-01 . +docker buildx build --tag rosetta:latest -f Dockerfile.${ROSETTA_BASE} --build-arg BASE_IMAGE=ghcr.io/nvidia/${ROSETTA_BASE}:mealkit-YYYY-MM-DD . +``` + +### Advanced use-cases +```sh +# [T5x Example] If you want to build with a different patchlist (patchlist must be relative to rosetta dir) +docker buildx build --build-arg T5X_PATCHLIST=patches/t5x/patchlist-t5x.txt.gen --build-arg FLAX_PATCHLIST=patches/flax/patchlist-flax.txt.gen --target rosetta --tag rosetta:latest -f Dockerfile.t5x . + +# [T5x Example] If you want to build with patches from another image +scripts/extract-patches.sh # Extracts generated patch dir under ./patches/ +docker buildx build --build-arg T5X_PATCHLIST=patches/t5x/patchlist-t5x.txt.gen --build-arg FLAX_PATCHLIST=patches/flax/patchlist-flax.txt.gen --target rosetta --tag rosetta:latest -f Dockerfile.t5x . ``` ## Development diff --git a/rosetta/create-distribution.sh b/rosetta/create-distribution.sh index 1eb99d111..a4db14ef3 100755 --- a/rosetta/create-distribution.sh +++ b/rosetta/create-distribution.sh @@ -14,6 +14,9 @@ Usage: $0 [OPTION]... -p, --patchlist=PATH Path to patchlist.txt with feature PRs -r, --ref=REF Git commit hash or tag name that specifies the base of the t5x distribution. Defaults to main (not origin/main) +A patchlist will be generated by this script and placed under $SCRIPT_DIR/patches/ with all entries +replaced with local patches. + Relationship between --dir, --extra-dir, and --mirror-url repo args: --dir: The upstream repo, locally cloned --mirror-url: A mirror of the upstream repo @@ -21,6 +24,8 @@ Relationship between --dir, --extra-dir, and --mirror-url repo args: Patches in the --patchlist will be applied from the repos above according to the following rules: + Local patches (relative to this file): + * ^file://.* --dir: * ^pull/.* --mirror-url: @@ -84,6 +89,13 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) INSTALLED_DIR=${INSTALLED_DIR:-/opt/t5x} DISTRIBUTION_BASE_REF=${DISTRIBUTION_BASE_REF:-HEAD} MIRROR_GIT_URL=${MIRROR_GIT_URL:-https://github.com/nvjax-svc-0/t5x.git} +GEN_PATCH_DIR=${GEN_PATCH_DIR:-$SCRIPT_DIR/patches/$(basename $(git -C $INSTALLED_DIR remote get-url origin) .git)} +GEN_PATCH_LIST=$GEN_PATCH_DIR/$(basename $PATCH_LIST).gen +if [[ -e $GEN_PATCH_LIST ]]; then + echo "[WARNING]: $GEN_PATCH_LIST exists and will be overwritten" + rm -f $GEN_PATCH_LIST +fi +mkdir -p $GEN_PATCH_DIR if [[ -z "${INSTALLED_DIR}" ]]; then echo "[ERROR]: Need to specify -d/--dir" @@ -177,6 +189,30 @@ fork-point() { merge_commit=$(git rev-list --ancestry-path ${feat_branch}..${main} | tail -n1) git merge-base ${merge_commit}^ ${feat_branch}^ } +# git-am + adds to generated patchlist +am+record() { + # Canonicalize path to remove extra slashes or dot syntax + patch_path=$(readlink -f $1) + if [[ ! $patch_path =~ ^${SCRIPT_DIR} ]]; then + echo "[ERROR]: patch_path=$patch_path should start with $SCRIPT_DIR" + exit 1 + fi + # Apply the patch + git am --3way <$patch_path || ret_code=$? + if [[ ${ret_code:-0} -ne 0 ]]; then + cat <> $GEN_PATCH_LIST +} apply-patches() { from=$1 to=$2 @@ -185,19 +221,22 @@ apply-patches() { if [[ $num_merge_commits -gt 0 ]]; then echo "[WARNING] There are merge commits between ${from}..${to}. Linearizing history before cherry-picking to remove merge-commits" >&2 # Make a tmp branch for the linear history - git checkout -b tmp-linear-tmp $to + to_linear=${to}.linearized + git checkout -b ${to_linear} $to # This will create a linear history git rebase $from # switch back to the rosetta-distribution branch git checkout - - to=tmp-linear-tmp + to=${to_linear} fi - git cherry-pick ${from}..${to} - ret_code=$? - if [[ $to == tmp-linear-tmp ]]; then - git branch -D tmp-linear-tmp + # Make the patch + patch_fname=$(tr '/' '-' <<< "$to").patch + git format-patch --stdout ${from}..${to} >$GEN_PATCH_DIR/$patch_fname + if [[ -n "${to_linear:-}" ]]; then + git branch -D ${to_linear} fi - return $ret_code + # Apply the patch + am+record $GEN_PATCH_DIR/$patch_fname } MIRROR_REMOTE_NAME=mirror if git remote show ${MIRROR_REMOTE_NAME} &>/dev/null; then @@ -210,7 +249,15 @@ for line in $(cat ${PATCH_LIST}); do continue fi git_ref=$(awk '{print $1}' <<< "${line}") - if [[ "${git_ref}" =~ ^pull/ ]]; then + if [[ "${git_ref}" =~ ^file:// ]]; then + patch_path=$SCRIPT_DIR/${git_ref#file://} + if [[ ! -f $patch_path ]]; then + echo "[ERROR]: ${git_ref} refers to $patch_path which does not exist" + exit 1 + fi + am+record $patch_path + continue + elif [[ "${git_ref}" =~ ^pull/ ]]; then REMOTE_NAME=origin PR_ID=$(cut -d/ -f2 <<<"${git_ref}") branch=PR-${PR_ID} @@ -229,7 +276,7 @@ for line in $(cat ${PATCH_LIST}); do main_branch=${REMOTE_NAME}/main else if [[ -z "${EXTRA_DIR+x}" ]] || [[ ! -d ${EXTRA_DIR} ]]; then - echo "[WARNING]: EXTRA_DIR=${EXTRA_DIR} does not exist so cannot cherry-pick ${git_ref}" + echo "[WARNING]: EXTRA_DIR=${EXTRA_DIR} does not exist so cannot apply patch=${git_ref}" continue fi REMOTE_NAME=${EXTRA_REMOTE_NAME} @@ -239,9 +286,8 @@ for line in $(cat ${PATCH_LIST}); do main_branch=${REMOTE_NAME}/main${TMP_BRANCH_SUFFIX} fi fork_point=$(fork-point ${main_branch} ${branch}) - ret_code=0 apply-patches ${fork_point} ${branch} || ret_code=$? - if [[ ${ret_code} -ne 0 ]]; then + if [[ ${ret_code:-0} -ne 0 ]]; then cat < /dev/null && pwd ) +cd $SCRIPT_DIR + +if [[ $# -lt 1 || $# -gt 2 ]]; then + echo "Copies the patches from within an image to the GIT_ROOT/rosetta/patches dir" + echo + echo "Usage: $0 " + exit 1 +fi + +IMAGE=$1 +ROSETTA_DIR=${2:-$(readlink -f ../)} + +container_id=$(docker create $IMAGE) +docker cp $container_id:/opt/rosetta/patches $ROSETTA_DIR +docker rm -v $container_id