Merge branch 'main' of https://github.com/microsoft/onnxruntime-genai …

…into user/pavignol/add-dml-fp16-cast
microsoft · Apr 22, 2024 · bbfe6a0 · bbfe6a0
2 parents 02cff23 + cf18f30
commit bbfe6a0
Show file tree

Hide file tree

Showing 8 changed files with 76 additions and 128 deletions.
diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml
@@ -1,4 +1,4 @@
-name: "Linux GPU x64 Build"
+name: "Linux CUDA x64 Build"
 on: [ workflow_dispatch, pull_request ]
 
 concurrency:
@@ -11,13 +11,25 @@ env:
   ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz"
 
 jobs:
-  linux-gpu-x64-build:
+  linux-cuda-x64-build:
+    env :
+      PYTHON_EXECUTABLE: "/opt/python/cp38-cp38/bin/python3.8"
     runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-T4" ]
     steps:
       - name: Checkout OnnxRuntime GenAI repo
         uses: actions/checkout@v4
         with:
           submodules: true
+# We are using the same manylinux repo as the one used in the packaging build
+      - name: Checkout ManyLinux repo
+        uses: actions/checkout@v4
+        with:
+          repository: pypa/manylinux
+          ref: 5eda9aded5462201e6310105728d33016e637ea7
+          clean: true
+          path: manylinux
+          submodules: true
+
       - name: Download OnnxRuntime
         run: |
           curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }}
@@ -31,45 +43,68 @@ jobs:
         run: |
           mv ${{ env.ort_dir }} ort
 
-      - name: Download Docker Image
+      - name: Get Docker Image
         run: |
           set -e -x
           az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
           az acr login --name onnxruntimebuildcache --subscription 00c06639-6ee4-454e-8058-8d8b1703bd87
-          python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile \
-            --context tools/ci_build/github/linux/docker/inference/x64/default/gpu \
+          python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/manylinux/Dockerfile.manylinux2_28_cuda  \
+            --context tools/ci_build/github/linux/docker/manylinux  \
             --docker-build-args "--build-arg BUILD_UID=$( id -u )" \
             --container-registry onnxruntimebuildcache \
-            --repository ort_genai_linux_gpu_gha
+            --manylinux-src manylinux \
+            --multiple_repos \
+            --repository onnxruntimecudabuildx64
 
-      - name: Print Docker Image Environment Variables
+      - name: Config with Cmake in Docker
         run: |
-          echo "Printing docker image environment variables"
-          docker run --rm ort_genai_linux_gpu_gha env
+          set -e -x
+          docker run \
+            --gpus all \
+            --rm \
+            --volume $GITHUB_WORKSPACE:/ort_genai_src \
+            -w /ort_genai_src onnxruntimecudabuildx64 \
+            bash -c " \
+              /usr/bin/cmake --preset linux_gcc_cuda_release \
+                -DMANYLINUX=ON \
+                -DPYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }} "
 
       - name: Build with Cmake in Docker
         run: |
-          echo "Running docker image ort_genai_linux_gpu_gha"
+          set -e -x
           docker run \
             --gpus all \
             --rm \
-            --volume $GITHUB_WORKSPACE:/onnxruntime_src \
-            -w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "echo $PATH && /usr/bin/cmake -DCMAKE_CUDA_ARCHITECTURES=86 --preset linux_gcc_cuda_release && /usr/bin/cmake --build --preset linux_gcc_cuda_release"
-      
-      - name: Install the onnxruntime-genai Python wheel and run Python tests
+            --volume $GITHUB_WORKSPACE:/ort_genai_src \
+            -w /ort_genai_src onnxruntimecudabuildx64 \
+            bash -c " \
+              /usr/bin/cmake --build --preset linux_gcc_cuda_release --parallel $( nproc )"
+
+      - name: Get HuggingFace Token
+        run: |
+          az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
+          HF_TOKEN=$(az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value)
+          echo "::add-mask::$HF_TOKEN"
+          echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
+
+      - name: Install the onnxruntime-genai Python wheel and run python test
         run: |
           echo "Installing the onnxruntime-genai Python wheel and running the Python tests"
           docker run \
             --gpus all \
             --rm \
-            --volume $GITHUB_WORKSPACE:/onnxruntime_src \
-            -w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "python3 -m pip install /onnxruntime_src/build/cuda/wheel/onnxruntime_genai*.whl --user && python3 -m pip install -r test/python/requirements.txt --user && python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models"
+            --volume $GITHUB_WORKSPACE:/ort_genai_src \
+            -e HF_TOKEN=$HF_TOKEN \
+            -w /ort_genai_src onnxruntimecudabuildx64 bash -c " \
+              ${{ env.PYTHON_EXECUTABLE }} -m pip install /ort_genai_src/build/cuda/wheel/onnxruntime_genai*manylinux*.whl --user && \
+              ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements-gpu.txt --user && \
+              ${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models"
 
-      - name: Docker -- Run tests
+      - name: Docker -- Run unit tests
         run: |
-          echo "Running docker image ort_genai_linux_gpu_gha"
+          echo "Running docker image onnxruntimecudabuildx64"
           docker run \
             --gpus all \
             --rm \
-            --volume $GITHUB_WORKSPACE:/onnxruntime_src \
-            -w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "/onnxruntime_src/build/cuda/test/unit_tests"
+            --volume $GITHUB_WORKSPACE:/ort_genai_src \
+            -w /ort_genai_src onnxruntimecudabuildx64 bash -c "/ort_genai_src/build/cuda/test/unit_tests"
diff --git a/examples/python/README.md b/examples/python/README.md
@@ -1,17 +1,12 @@
-# Gen-AI Python Examples
+# Generate() API Python Examples
 
 ## Install the onnxruntime-genai library
 
 Install the python package according to the [installation instructions](https://onnxruntime.ai/docs/genai/howto/install).
 
-  ```bash
-  cd build/wheel
-  pip install onnxruntime_genai-*.whl
-  ```
-
 ## Get the model
 
-You can generate the model using the model builder this library, or bring your own model.
+You can generate the model using the model builder with this library, or bring your own model.
 
 If you bring your own model, you need to provide the configuration. See the [config reference](https://onnxruntime.ai/docs/genai/reference/config).
 
@@ -20,11 +15,7 @@ To generate the model with model builder:
 1. Install the model builder script dependencies
 
    ```bash
-   pip install numpy
-   pip install transformers
-   pip install torch
-   pip install onnx
-   pip install onnxruntime
+   pip install numpy transformers torch onnx onnxruntime
    ```
 
 2. Choose a model. Examples of supported ones are:
@@ -42,10 +33,14 @@ To generate the model with model builder:
 
 ## Run the example model script
 
-See accompanying chat-e2e-example.sh and generate-e2e-example.sh scripts for end-to-end examples of workflow.
+See accompanying qa-e2e-example.sh and generate-e2e-example.sh scripts for end-to-end examples of workflow.
+
+The `model-generate` script generates the output sequence all on one function call.
+
+The `model-qa` script streams the output text token by token.
 
 To run the python examples...
 ```bash
 python model-generate.py -m {path to model folder} -ep {cpu or cuda} -i {string prompt}
-python model-chat.py -m {path to model folder} -ep {cpu or cuda}
+python model-qa.py -m {path to model folder} -ep {cpu or cuda}
 ```
diff --git a/examples/python/generate-e2e-example.sh b/examples/python/generate-e2e-example.sh
@@ -1,8 +1,4 @@
-# Description: Example of generate end-to-end usage, including model building and running.
-pip install numpy
-pip install transformers
-pip install torch
-pip install onnx
-pip install onnxruntime-gpu
+# Description: Example of generate end-to-end usage, including model building and running
+pip install numpy transformers torch onnx onnxruntime
 python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache
-python3 model-generate.py -m genai_models/phi2-int4-cpu -pr "my favorite movie is" "write a function that always returns True" "I am very happy" -ep cpu -p 0.0 -k 1 -v 
+python3 model-generate.py -m genai_models/phi2-int4-cpu -pr "my favorite movie is" "write a function that always returns True" "I am very happy" -p 0.0 -k 1 -v 
diff --git a/examples/python/chat-e2e-example.sh → examples/python/qa-e2e-example.sh b/examples/python/chat-e2e-example.sh → examples/python/qa-e2e-example.sh
@@ -1,3 +1,3 @@
 # Description: Example of chatbot end-to-end usage, including model building and running.
 python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache
-python3 model-chat.py -m genai_models/phi2-int4-cpu -ep cpu -p 0.0 -k 1
+python3 model-qa.py -m genai_models/phi2-int4-cpu -ep cpu -p 0.0 -k 1
diff --git a/test/python/requirements-gpu.txt b/test/python/requirements-gpu.txt
@@ -0,0 +1,8 @@
+-f https://download.pytorch.org/whl/torch_stable.html
+torch==2.2.1+cu118
+numpy
+pytest
+onnx
+onnxruntime_gpu
+transformers
+huggingface_hub[cli]
diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/scripts/install_centos.sh b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/scripts/install_centos.sh
diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/scripts/install_deps.sh