From e74f141338547d7eea9c6cbe23d1c892174163cf Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 31 Jan 2024 09:39:27 +0800 Subject: [PATCH] Save stablediffusion and open-clip in pipeline cache (#19314) ### Description 1. save the model to pipeline cache 2. lower the similarly bar to 97 3. publish the generated image that we can check it once the test fails ### Motivation and Context Reduce model downloads --- .../models/stable_diffusion/demo_utils.py | 6 ++- .../models/stable_diffusion/engine_builder.py | 8 +++- .../stable_diffusion/test/check_image.py | 19 +++++--- .../azure-pipelines/bigmodels-ci-pipeline.yml | 47 ++++++++++++++++--- 4 files changed, 64 insertions(+), 16 deletions(-) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py index 32c673416fce2..369f31511faca 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py @@ -242,6 +242,8 @@ def parse_arguments(is_xl: bool, parser): parser.add_argument("--deterministic", action="store_true", help="use deterministic algorithms.") parser.add_argument("-dc", "--disable-cuda-graph", action="store_true", help="Disable cuda graph.") + parser.add_argument("--framework-model-dir", default=None, help="framework model directory") + group = parser.add_argument_group("Options for ORT_CUDA engine only") group.add_argument("--enable-vae-slicing", action="store_true", help="True will feed only one image to VAE once.") @@ -406,6 +408,7 @@ def initialize_pipeline( lora_scale=1.0, use_fp16_vae=True, use_vae=True, + framework_model_dir=None, ): pipeline_info = PipelineInfo( version, @@ -425,7 +428,7 @@ def initialize_pipeline( input_engine_dir = engine_dir onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache = get_engine_paths( - work_dir=work_dir, pipeline_info=pipeline_info, engine_type=engine_type + work_dir=work_dir, pipeline_info=pipeline_info, engine_type=engine_type, framework_model_dir=framework_model_dir ) pipeline = StableDiffusionPipeline( @@ -558,6 +561,7 @@ def load_pipelines(args, batch_size=None): "lora_scale": args.lora_scale, "use_fp16_vae": "xl" in args.version, "use_vae": True, + "framework_model_dir": args.framework_model_dir, } if "xl" in args.version: diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py index 46a83f5dc228d..c03c6f0b21cd3 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py @@ -5,6 +5,7 @@ import hashlib import os from enum import Enum +from typing import Optional import torch from diffusion_models import CLIP, VAE, CLIPWithProj, PipelineInfo, UNet, UNetXL @@ -273,7 +274,9 @@ def vae_decode(self, latents): return self._vae_decode(latents) -def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType): +def get_engine_paths( + work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType, framework_model_dir: Optional[str] = None +): root_dir = work_dir or "." short_name = pipeline_info.short_name() @@ -287,6 +290,7 @@ def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: En # Shared among ORT_CUDA, ORT_TRT and TRT engines, and need use load_model(..., always_download_fp16=True) # So that the shared model is always fp16. - framework_model_dir = os.path.join(root_dir, "torch_model") + if framework_model_dir is None: + framework_model_dir = os.path.join(root_dir, "torch_model") return onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py index fcfe8b081fb0a..da7f47b144b9b 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py @@ -1,5 +1,6 @@ import argparse import os +from typing import Optional import cv2 import open_clip @@ -12,13 +13,16 @@ def arg_parser(): parser = argparse.ArgumentParser(description="Options for Compare 2 image") parser.add_argument("--image1", type=str, help="Path to image 1") parser.add_argument("--image2", type=str, help="Path to image 2") + parser.add_argument("--cache_dir", type=str, help="Path to model cache directory") args = parser.parse_args() return args -def image_encoder(img: Image.Image): # -> torch.Tensor: +def image_encoder(img: Image.Image, cache_dir: Optional[str] = None): # -> torch.Tensor: device = "cuda" if torch.cuda.is_available() else "cpu" - model, _, preprocess = open_clip.create_model_and_transforms("ViT-B-16-plus-240", pretrained="laion400m_e32") + model, _, preprocess = open_clip.create_model_and_transforms( + "ViT-B-16-plus-240", pretrained="laion400m_e32", cache_dir=cache_dir + ) model.to(device) img1 = Image.fromarray(img).convert("RGB") @@ -41,11 +45,11 @@ def load_image(image_path: str): # -> Image.Image: return img -def generate_score(image1: str, image2: str): # -> float: +def generate_score(image1: str, image2: str, cache_dir: Optional[str] = None): # -> float: test_img = load_image(image1) data_img = load_image(image2) - img1 = image_encoder(test_img) - img2 = image_encoder(data_img) + img1 = image_encoder(test_img, cache_dir) + img2 = image_encoder(data_img, cache_dir) cos_scores = util.pytorch_cos_sim(img1, img2) score = round(float(cos_scores[0][0]) * 100, 2) return score @@ -55,9 +59,10 @@ def main(): args = arg_parser() image1 = args.image1 image2 = args.image2 - score = round(generate_score(image1, image2), 2) + cache_dir = args.cache_dir + score = round(generate_score(image1, image2, cache_dir), 2) print("similarity Score: ", {score}) - if score < 99: + if score < 97: print(f"{image1} and {image2} are different") raise SystemExit(1) else: diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml index dd88a4d6d5632..0de2ac44215c4 100644 --- a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml @@ -172,6 +172,9 @@ stages: - job: Stable_Diffusion variables: skipComponentGovernanceDetection: true + CLIP_MODEL_CACHE: $(Agent.TempDirectory)/clip_cache + STABLE_DIFFUSION_MODEL_CACHE: $(Agent.TempDirectory)/stablediffusion_cache + GenerateImage_DIR: $(Agent.TempDirectory)/images workspace: clean: all pool: onnxruntime-Linux-GPU-A10-12G @@ -188,9 +191,23 @@ stages: SpecificArtifact: ${{ parameters.specificArtifact }} BuildId: ${{ parameters.BuildId }} + - task: Cache@2 + inputs: + key: stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py + restoreKeys: | + stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py + stable_diffusion + path: $(STABLE_DIFFUSION_MODEL_CACHE) + displayName: Cache stable diffusion model + - script: | - docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \ - bash -c ' + mkdir -p $(GenerateImage_DIR) + docker run --rm --gpus all -v $PWD:/workspace \ + -v $(Build.BinariesDirectory)/Release:/Release \ + -v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \ + -v $(GenerateImage_DIR):/images:rw \ + nvcr.io/nvidia/pytorch:22.11-py3 \ + bash -c ' \ set -ex; \ python3 --version; \ python3 -m pip install --upgrade pip; \ @@ -199,15 +216,33 @@ stages: python3 -m pip install -r requirements-cuda11.txt; \ python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \ echo Generate an image guided by a text prompt; \ - python3 demo_txt2img.py --seed 1 --deterministic "astronaut riding a horse on mars" ; \ - find $(pwd) -name "*.png" ; \ + python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \ + find $(pwd)/ORT_CUDA -name "*.png" -exec cp {} /images/ \; ; \ popd ; \ ' displayName: 'Run stable diffusion demo' workingDirectory: $(Build.SourcesDirectory) + # For verification we will check the generated image looks . + - task: PublishPipelineArtifact@0 + displayName: 'Publish code coverage report' + inputs: + artifactName: "Generated-Image" + targetPath: '$(GenerateImage_DIR)' + + - task: Cache@2 + inputs: + key: clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py + restoreKeys: | + clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py + clip_model + path: $(CLIP_MODEL_CACHE) + displayName: Cache clip model + - script: | - docker run --rm --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:22.11-py3 \ + docker run --rm --gpus all -v $PWD:/workspace \ + -v $(CLIP_MODEL_CACHE):/model_cache:rw \ + nvcr.io/nvidia/pytorch:22.11-py3 \ bash -c ' set -ex; \ python3 --version; \ @@ -217,7 +252,7 @@ stages: pushd test; \ python3 -m pip install -r requirements.txt; \ echo check demo_txt2image.py generate image; \ - python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2; \ + python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \ popd ; \ popd ; \ '