Skip to content

Commit

Permalink
Add FP32 and INT4 test in Llama2 (#21187)
Browse files Browse the repository at this point in the history
### Description
<!-- Describe your changes. -->



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
  • Loading branch information
mszhanyi authored Jun 27, 2024
1 parent d1ab94c commit 587e92c
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 16 deletions.
67 changes: 51 additions & 16 deletions tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,6 @@ parameters:
type: number
default: 0

resources:
repositories:
- repository: LLaMa2Onnx
type: Github
endpoint: Microsoft
name: Microsoft/Llama-2-Onnx
ref: main

variables:
- template: templates/common-variables.yml
- name: docker_base_image
Expand Down Expand Up @@ -287,11 +279,12 @@ stages:
workingDirectory: $(Build.SourcesDirectory)
condition: ne(variables.hitAnother, 'True')
- stage: Llama2_ONNX_FP16
- stage: Llama2_7B_ONNX
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Llama2_ONNX_FP16
- job: Llama2_7B_ONNX
timeoutInMinutes: 120
variables:
skipComponentGovernanceDetection: true
workspace:
Expand Down Expand Up @@ -319,15 +312,15 @@ stages:

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: "
--build-arg BUILD_UID=$( id -u )
--build-arg BASEIMAGE=${{ variables.docker_base_image }}
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
"
Repository: onnxruntimeubi8packagestest
Repository: onnxruntimeubi8packagestest_torch
UpdateDepsTxt: false

- task: DownloadPackage@1
Expand All @@ -343,7 +336,7 @@ stages:
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
Expand All @@ -352,14 +345,56 @@ stages:
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --input /meta-llama2 --small_gpu ;\
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --small_gp;\
ls -l llama2-7b-fp16; \
du -sh llama2-7b-fp16; \
popd ; \
"
displayName: 'Run Llama2 to Onnx F16 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp32-gpu --precision fp32 --execution_provider cuda;\
ls -l llama2-7b-fp32-gpu; \
du -sh llama2-7b-fp32-gpu; \
popd ; \
"
displayName: 'Run Llama2 to Onnx fp32 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-int4-gpu --precision int4 --execution_provider cuda --use_gqa;\
ls -l llama2-7b-int4-gpu; \
du -sh llama2-7b-int4-gpu; \
popd ; \
"
displayName: 'Run Llama2 to Onnx INT4 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- stage: Whisper_ONNX
dependsOn:
- Build_Onnxruntime_Cuda
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# --------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------
# Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10.0 and CUDA 11.8 by default

# Build base image with required system packages
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
ARG TRT_VERSION=10.0.1.6-1.cuda11.8
FROM $BASEIMAGE AS base
ARG TRT_VERSION
ENV PATH /opt/python/cp38-cp38/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}

RUN dnf install -y bash wget &&\
dnf clean dbcache

RUN pip3 install --upgrade pip
RUN pip3 install setuptools>=68.2.2

#Install TensorRT only if TRT_VERSION is not empty
RUN if [ -n "$TRT_VERSION" ]; then \
echo "TRT_VERSION is $TRT_VERSION" && \
dnf -y install \
libnvinfer10-${TRT_VERSION} \
libnvinfer-headers-devel-${TRT_VERSION} \
libnvinfer-devel-${TRT_VERSION} \
libnvinfer-lean10-${TRT_VERSION} \
libnvonnxparsers10-${TRT_VERSION} \
libnvonnxparsers-devel-${TRT_VERSION} \
libnvinfer-dispatch10-${TRT_VERSION} \
libnvinfer-plugin10-${TRT_VERSION} \
libnvinfer-vc-plugin10-${TRT_VERSION} \
libnvinfer-bin-${TRT_VERSION} \
libnvinfer-plugin10-${TRT_VERSION} \
libnvinfer-plugin-devel-${TRT_VERSION} \
libnvinfer-vc-plugin-devel-${TRT_VERSION} \
libnvinfer-lean-devel-${TRT_VERSION} \
libnvinfer-dispatch-devel-${TRT_VERSION} \
libnvinfer-headers-plugin-devel-${TRT_VERSION} && \
dnf clean dbcache ; \
else \
echo "TRT_VERSION is none skipping Tensor RT Installation" ; \
fi

ADD scripts /tmp/scripts
RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && /tmp/scripts/install_java.sh && rm -rf /tmp/scripts

RUN python3 -m pip uninstall -y torch
RUN python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118

# Build final image from base.
FROM base as final
ARG BUILD_USER=onnxruntimedev
ARG BUILD_UID=1000
RUN adduser --uid $BUILD_UID $BUILD_USER
WORKDIR /home/$BUILD_USER
USER $BUILD_USER

0 comments on commit 587e92c

Please sign in to comment.