Skip to content

Commit

Permalink
Add Big models pipeline (#19222)
Browse files Browse the repository at this point in the history
### Description
2 models are added in CI.
Stabe diffusion Model stage is based on
https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md

LLama2 FP16 is based on https://github.com/microsoft/Llama-2-Onnx.
12G GPU memory is not enough, so I choose T4 to run it.

### Motivation and Context
Add regular E2E test for big models. 
It will be triggered in main build, that is, it'll run after one PR is
merged.

More models will be added later.

### Test Runs ###

https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1275191&view=results
  • Loading branch information
mszhanyi authored Jan 22, 2024
1 parent 8d9d751 commit 780acda
Showing 1 changed file with 259 additions and 0 deletions.
259 changes: 259 additions & 0 deletions tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
parameters:
- name: specificArtifact
displayName: Use Specific Artifact
type: boolean
default: false
- name: BuildId
displayName: Specific Artifact's RunId
type: number
default: 0

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

- repository: LLaMa2Onnx
type: Github
endpoint: Microsoft
name: Microsoft/Llama-2-Onnx
ref: main

variables:
- template: templates/common-variables.yml
- name: docker_base_image
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
- name: linux_trt_version
value: 8.6.1.6-1.cuda11.8

stages:
- stage: Build_Onnxruntime_Cuda
jobs:
- job: Linux_Build
timeoutInMinutes: 120
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
workspace:
clean: all
pool: onnxruntime-Ubuntu2204-AMD-CPU
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()

- checkout: self
clean: true
submodules: none

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
Repository: onnxruntimecuda11build

- task: Cache@2
inputs:
key: '"ccache" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
path: $(CCACHE_DIR)
restoreKeys: |
"ccache" | "$(Build.SourceBranch)"
"ccache"
cacheHitVar: CACHE_RESTORED
displayName: Cach Task

- script: |
sudo mkdir -p $(Pipeline.Workspace)/ccache
condition: ne(variables.CACHE_RESTORED, 'true')
displayName: Create Cache Dir
- task: CmdLine@2
inputs:
script: |
mkdir -p $HOME/.onnx
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /data/models:/build/models:ro \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
--volume $(Pipeline.Workspace)/ccache:/cache \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
onnxruntimecuda11build \
/bin/bash -c "
set -ex; \
env; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;86' ; \
ccache -sv; \
ccache -z"
workingDirectory: $(Build.SourcesDirectory)

- task: CmdLine@2
inputs:
script: |
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
rm -f $(Build.BinariesDirectory)/Release/models
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
cd $(Build.BinariesDirectory)/Release
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
- script: |
set -ex
mkdir -p $(Agent.TempDirectory)/ort
cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/
displayName: 'Copy Wheels'
- task: PublishPipelineArtifact@0
displayName: 'Publish Pipeline Artifact'
inputs:
artifactName: 'drop-ort-linux-gpu'
targetPath: '$(Agent.TempDirectory)/ort'

- template: templates/explicitly-defined-final-tasks.yml

- stage: Stale_Diffusion
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Stale_Diffusion
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
workspace:
clean: all
pool: onnxruntime-Linux-GPU-A10-12G
steps:
- checkout: self
clean: true
submodules: none

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/Release'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}

- script: |
docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \
bash -c "
set -ex; \
python3 --version; \
python3 -m pip install --upgrade pip; \
python3 -m pip install /Release/*.whl; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
python3 -m pip install -r requirements-cuda11.txt; \
python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \
echo Generate an image guided by a text prompt; \
python3 demo_txt2img.py "astronaut riding a horse on mars"; \
echo Generate an image with Stable Diffusion XL guided by a text prompt; \
python3 demo_txt2img_xl.py 'starry night over Golden Gate Bridge by van gogh'; \
python3 demo_txt2img_xl.py --enable-refiner 'starry night over Golden Gate Bridge by van gogh'; \
echo Generate an image guided by a text prompt using LCM LoRA; \
python3 demo_txt2img_xl.py --scheduler LCM --lora-weights latent-consistency/lcm-lora-sdxl --denoising-steps 4 "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"; \
popd; \
"
displayName: 'Run stable diffusion demo'
workingDirectory: $(Build.SourcesDirectory)
- stage: Llama2_ONNX_FP16
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Llama2_ONNX_FP16
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool: onnxruntime-Linux-GPU-T4
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()

- checkout: self
clean: true
submodules: none

- checkout: LLaMa2Onnx
clean: true
submodules: none

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}

- task: DownloadPackage@1
displayName: 'Download Llama2 model'
inputs:
packageType: upack
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
version: 1.0.0
definition: '772ebce3-7e06-46d5-b3cc-82040ec4b2ce'
downloadPath: $(Agent.TempDirectory)/llama2_onnx_ft16

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: onnxruntime/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
Context: onnxruntime/tools/ci_build/github/linux/docker/
ScriptName: onnxruntime/tools/ci_build/get_docker_image.py
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimeubi8packagestest
UpdateDepsTxt: false

- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory)/Llama-2-Onnx:/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/llama2_onnx_ft16:/models \
onnxruntimeubi8packagestest \
bash -c "
set -ex; \
python3 -m pip install --upgrade pip ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m pip install sentencepiece ; \
pushd /workspace ; \
python3 MinimumExample/Example_ONNX_LlamaV2.py --onnx_file /models/ONNX/LlamaV2_7B_FT_float16.onnx \
--embedding_file /models/embeddings.pth --tokenizer_path tokenizer.model --prompt 'What is the lightest element?' > /workspace/answer.txt ; \
popd ; \
"
displayName: 'Run Llama2 demo'
workingDirectory: $(Build.SourcesDirectory)
- script: |
set -ex
real=$(cat $(Build.SourcesDirectory)/Llama-2-Onnx/answer.txt)
trim_actual=$(tr -dc '[[:print:]]' <<< "$real")
expected="The lightest element is hydrogen. Hydrogen is the lightest element on the periodic table, with an atomic mass of 1.00794 u (unified atomic mass units)."
[ "$expected" == "$trim_actual" ] && exit 0 || exit 1
displayName: 'Check result'

0 comments on commit 780acda

Please sign in to comment.