-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
### Description 2 models are added in CI. Stabe diffusion Model stage is based on https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md LLama2 FP16 is based on https://github.com/microsoft/Llama-2-Onnx. 12G GPU memory is not enough, so I choose T4 to run it. ### Motivation and Context Add regular E2E test for big models. It will be triggered in main build, that is, it'll run after one PR is merged. More models will be added later. ### Test Runs ### https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1275191&view=results
- Loading branch information
Showing
1 changed file
with
259 additions
and
0 deletions.
There are no files selected for viewing
259 changes: 259 additions & 0 deletions
259
tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md | ||
parameters: | ||
- name: specificArtifact | ||
displayName: Use Specific Artifact | ||
type: boolean | ||
default: false | ||
- name: BuildId | ||
displayName: Specific Artifact's RunId | ||
type: number | ||
default: 0 | ||
|
||
resources: | ||
repositories: | ||
- repository: manylinux | ||
type: Github | ||
endpoint: Microsoft | ||
name: pypa/manylinux | ||
ref: 5eda9aded5462201e6310105728d33016e637ea7 | ||
|
||
- repository: LLaMa2Onnx | ||
type: Github | ||
endpoint: Microsoft | ||
name: Microsoft/Llama-2-Onnx | ||
ref: main | ||
|
||
variables: | ||
- template: templates/common-variables.yml | ||
- name: docker_base_image | ||
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8 | ||
- name: linux_trt_version | ||
value: 8.6.1.6-1.cuda11.8 | ||
|
||
stages: | ||
- stage: Build_Onnxruntime_Cuda | ||
jobs: | ||
- job: Linux_Build | ||
timeoutInMinutes: 120 | ||
variables: | ||
skipComponentGovernanceDetection: true | ||
CCACHE_DIR: $(Pipeline.Workspace)/ccache | ||
workspace: | ||
clean: all | ||
pool: onnxruntime-Ubuntu2204-AMD-CPU | ||
steps: | ||
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 | ||
displayName: 'Clean Agent Directories' | ||
condition: always() | ||
|
||
- checkout: self | ||
clean: true | ||
submodules: none | ||
|
||
- template: templates/get-docker-image-steps.yml | ||
parameters: | ||
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda | ||
Context: tools/ci_build/github/linux/docker | ||
DockerBuildArgs: " | ||
--network=host | ||
--build-arg BASEIMAGE=$(docker_base_image) | ||
--build-arg TRT_VERSION=$(linux_trt_version) | ||
--build-arg BUILD_UID=$( id -u ) | ||
" | ||
Repository: onnxruntimecuda11build | ||
|
||
- task: Cache@2 | ||
inputs: | ||
key: '"ccache" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"' | ||
path: $(CCACHE_DIR) | ||
restoreKeys: | | ||
"ccache" | "$(Build.SourceBranch)" | ||
"ccache" | ||
cacheHitVar: CACHE_RESTORED | ||
displayName: Cach Task | ||
|
||
- script: | | ||
sudo mkdir -p $(Pipeline.Workspace)/ccache | ||
condition: ne(variables.CACHE_RESTORED, 'true') | ||
displayName: Create Cache Dir | ||
- task: CmdLine@2 | ||
inputs: | ||
script: | | ||
mkdir -p $HOME/.onnx | ||
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \ | ||
--volume /data/onnx:/data/onnx:ro \ | ||
--volume $(Build.SourcesDirectory):/onnxruntime_src \ | ||
--volume $(Build.BinariesDirectory):/build \ | ||
--volume /data/models:/build/models:ro \ | ||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ | ||
--volume $(Pipeline.Workspace)/ccache:/cache \ | ||
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \ | ||
-e NIGHTLY_BUILD \ | ||
-e BUILD_BUILDNUMBER \ | ||
-e CCACHE_DIR=/cache \ | ||
onnxruntimecuda11build \ | ||
/bin/bash -c " | ||
set -ex; \ | ||
env; \ | ||
ccache -s; \ | ||
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \ | ||
--build_dir /build --cmake_generator Ninja \ | ||
--config Release --update --build \ | ||
--skip_submodule_sync \ | ||
--build_shared_lib \ | ||
--parallel \ | ||
--build_wheel \ | ||
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \ | ||
--enable_cuda_profiling --enable_cuda_nhwc_ops \ | ||
--enable_pybind --build_java \ | ||
--use_cache \ | ||
--cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;86' ; \ | ||
ccache -sv; \ | ||
ccache -z" | ||
workingDirectory: $(Build.SourcesDirectory) | ||
|
||
- task: CmdLine@2 | ||
inputs: | ||
script: | | ||
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11 | ||
rm -f $(Build.BinariesDirectory)/Release/models | ||
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete | ||
cd $(Build.BinariesDirectory)/Release | ||
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt | ||
- script: | | ||
set -ex | ||
mkdir -p $(Agent.TempDirectory)/ort | ||
cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/ | ||
displayName: 'Copy Wheels' | ||
- task: PublishPipelineArtifact@0 | ||
displayName: 'Publish Pipeline Artifact' | ||
inputs: | ||
artifactName: 'drop-ort-linux-gpu' | ||
targetPath: '$(Agent.TempDirectory)/ort' | ||
|
||
- template: templates/explicitly-defined-final-tasks.yml | ||
|
||
- stage: Stale_Diffusion | ||
dependsOn: | ||
- Build_Onnxruntime_Cuda | ||
jobs: | ||
- job: Stale_Diffusion | ||
variables: | ||
skipComponentGovernanceDetection: true | ||
CCACHE_DIR: $(Pipeline.Workspace)/ccache | ||
workspace: | ||
clean: all | ||
pool: onnxruntime-Linux-GPU-A10-12G | ||
steps: | ||
- checkout: self | ||
clean: true | ||
submodules: none | ||
|
||
- template: templates/flex-downloadPipelineArtifact.yml | ||
parameters: | ||
StepName: 'Download Onnxruntime Artifact' | ||
ArtifactName: 'drop-ort-linux-gpu' | ||
TargetPath: '$(Build.BinariesDirectory)/Release' | ||
SpecificArtifact: ${{ parameters.specificArtifact }} | ||
BuildId: ${{ parameters.BuildId }} | ||
|
||
- script: | | ||
docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \ | ||
bash -c " | ||
set -ex; \ | ||
python3 --version; \ | ||
python3 -m pip install --upgrade pip; \ | ||
python3 -m pip install /Release/*.whl; \ | ||
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \ | ||
python3 -m pip install -r requirements-cuda11.txt; \ | ||
python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \ | ||
echo Generate an image guided by a text prompt; \ | ||
python3 demo_txt2img.py "astronaut riding a horse on mars"; \ | ||
echo Generate an image with Stable Diffusion XL guided by a text prompt; \ | ||
python3 demo_txt2img_xl.py 'starry night over Golden Gate Bridge by van gogh'; \ | ||
python3 demo_txt2img_xl.py --enable-refiner 'starry night over Golden Gate Bridge by van gogh'; \ | ||
echo Generate an image guided by a text prompt using LCM LoRA; \ | ||
python3 demo_txt2img_xl.py --scheduler LCM --lora-weights latent-consistency/lcm-lora-sdxl --denoising-steps 4 "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"; \ | ||
popd; \ | ||
" | ||
displayName: 'Run stable diffusion demo' | ||
workingDirectory: $(Build.SourcesDirectory) | ||
- stage: Llama2_ONNX_FP16 | ||
dependsOn: | ||
- Build_Onnxruntime_Cuda | ||
jobs: | ||
- job: Llama2_ONNX_FP16 | ||
variables: | ||
skipComponentGovernanceDetection: true | ||
workspace: | ||
clean: all | ||
pool: onnxruntime-Linux-GPU-T4 | ||
steps: | ||
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 | ||
displayName: 'Clean Agent Directories' | ||
condition: always() | ||
|
||
- checkout: self | ||
clean: true | ||
submodules: none | ||
|
||
- checkout: LLaMa2Onnx | ||
clean: true | ||
submodules: none | ||
|
||
- template: templates/flex-downloadPipelineArtifact.yml | ||
parameters: | ||
StepName: 'Download Onnxruntime Artifact' | ||
ArtifactName: 'drop-ort-linux-gpu' | ||
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/' | ||
SpecificArtifact: ${{ parameters.specificArtifact }} | ||
BuildId: ${{ parameters.BuildId }} | ||
|
||
- task: DownloadPackage@1 | ||
displayName: 'Download Llama2 model' | ||
inputs: | ||
packageType: upack | ||
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' | ||
version: 1.0.0 | ||
definition: '772ebce3-7e06-46d5-b3cc-82040ec4b2ce' | ||
downloadPath: $(Agent.TempDirectory)/llama2_onnx_ft16 | ||
|
||
- template: templates/get-docker-image-steps.yml | ||
parameters: | ||
Dockerfile: onnxruntime/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 | ||
Context: onnxruntime/tools/ci_build/github/linux/docker/ | ||
ScriptName: onnxruntime/tools/ci_build/get_docker_image.py | ||
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" | ||
Repository: onnxruntimeubi8packagestest | ||
UpdateDepsTxt: false | ||
|
||
- script: | | ||
docker run --rm --gpus all -v $(Build.SourcesDirectory)/Llama-2-Onnx:/workspace \ | ||
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \ | ||
-v $(Agent.TempDirectory)/llama2_onnx_ft16:/models \ | ||
onnxruntimeubi8packagestest \ | ||
bash -c " | ||
set -ex; \ | ||
python3 -m pip install --upgrade pip ; \ | ||
python3 -m pip install /ort-artifact/*.whl ; \ | ||
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \ | ||
python3 -m pip install sentencepiece ; \ | ||
pushd /workspace ; \ | ||
python3 MinimumExample/Example_ONNX_LlamaV2.py --onnx_file /models/ONNX/LlamaV2_7B_FT_float16.onnx \ | ||
--embedding_file /models/embeddings.pth --tokenizer_path tokenizer.model --prompt 'What is the lightest element?' > /workspace/answer.txt ; \ | ||
popd ; \ | ||
" | ||
displayName: 'Run Llama2 demo' | ||
workingDirectory: $(Build.SourcesDirectory) | ||
- script: | | ||
set -ex | ||
real=$(cat $(Build.SourcesDirectory)/Llama-2-Onnx/answer.txt) | ||
trim_actual=$(tr -dc '[[:print:]]' <<< "$real") | ||
expected="The lightest element is hydrogen. Hydrogen is the lightest element on the periodic table, with an atomic mass of 1.00794 u (unified atomic mass units)." | ||
[ "$expected" == "$trim_actual" ] && exit 0 || exit 1 | ||
displayName: 'Check result' |