From 38e8b65afafe5aa36e1f258268efa9c7a4405bb5 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 29 Jan 2024 14:16:50 +0800 Subject: [PATCH] save stablediffusion and open-clip in pipeline cache --- .../models/stable_diffusion/demo_utils.py | 6 +++- .../models/stable_diffusion/engine_builder.py | 6 ++-- .../stable_diffusion/test/check_image.py | 15 ++++---- .../azure-pipelines/bigmodels-ci-pipeline.yml | 35 ++++++++++++++++--- 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py index 32c673416fce2..7bbaf195fc0fe 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/demo_utils.py @@ -242,6 +242,8 @@ def parse_arguments(is_xl: bool, parser): parser.add_argument("--deterministic", action="store_true", help="use deterministic algorithms.") parser.add_argument("-dc", "--disable-cuda-graph", action="store_true", help="Disable cuda graph.") + parser.add_argument("--framework-model-dir", default=None, help="framework model directory") + group = parser.add_argument_group("Options for ORT_CUDA engine only") group.add_argument("--enable-vae-slicing", action="store_true", help="True will feed only one image to VAE once.") @@ -406,6 +408,7 @@ def initialize_pipeline( lora_scale=1.0, use_fp16_vae=True, use_vae=True, + framework_model_dir=None, ): pipeline_info = PipelineInfo( version, @@ -425,7 +428,7 @@ def initialize_pipeline( input_engine_dir = engine_dir onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache = get_engine_paths( - work_dir=work_dir, pipeline_info=pipeline_info, engine_type=engine_type + work_dir=work_dir, pipeline_info=pipeline_info, engine_type=engine_type, framework_model_dir=framework_model_dir ) pipeline = StableDiffusionPipeline( @@ -558,6 +561,7 @@ def load_pipelines(args, batch_size=None): "lora_scale": args.lora_scale, "use_fp16_vae": "xl" in args.version, "use_vae": True, + "framework_model_dir": args.framework_model_dir } if "xl" in args.version: diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py index 46a83f5dc228d..f9af78fc4d288 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py @@ -5,6 +5,7 @@ import hashlib import os from enum import Enum +from typing import Optional import torch from diffusion_models import CLIP, VAE, CLIPWithProj, PipelineInfo, UNet, UNetXL @@ -273,7 +274,7 @@ def vae_decode(self, latents): return self._vae_decode(latents) -def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType): +def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: EngineType, framework_model_dir: Optional[str] = None): root_dir = work_dir or "." short_name = pipeline_info.short_name() @@ -287,6 +288,7 @@ def get_engine_paths(work_dir: str, pipeline_info: PipelineInfo, engine_type: En # Shared among ORT_CUDA, ORT_TRT and TRT engines, and need use load_model(..., always_download_fp16=True) # So that the shared model is always fp16. - framework_model_dir = os.path.join(root_dir, "torch_model") + if framework_model_dir is None: + framework_model_dir = os.path.join(root_dir, "torch_model") return onnx_dir, engine_dir, output_dir, framework_model_dir, timing_cache diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py index fcfe8b081fb0a..5c9e9edefbb7c 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py @@ -1,5 +1,6 @@ import argparse import os +from typing import Optional import cv2 import open_clip @@ -12,13 +13,14 @@ def arg_parser(): parser = argparse.ArgumentParser(description="Options for Compare 2 image") parser.add_argument("--image1", type=str, help="Path to image 1") parser.add_argument("--image2", type=str, help="Path to image 2") + parser.add_argument("--cache_dir", type=str, help="Path to model cache directory") args = parser.parse_args() return args -def image_encoder(img: Image.Image): # -> torch.Tensor: +def image_encoder(img: Image.Image, cache_dir: Optional[str] = None): # -> torch.Tensor: device = "cuda" if torch.cuda.is_available() else "cpu" - model, _, preprocess = open_clip.create_model_and_transforms("ViT-B-16-plus-240", pretrained="laion400m_e32") + model, _, preprocess = open_clip.create_model_and_transforms("ViT-B-16-plus-240", pretrained="laion400m_e32", cache_dir=cache_dir) model.to(device) img1 = Image.fromarray(img).convert("RGB") @@ -41,11 +43,11 @@ def load_image(image_path: str): # -> Image.Image: return img -def generate_score(image1: str, image2: str): # -> float: +def generate_score(image1: str, image2: str, cache_dir: Optional[str] = None): # -> float: test_img = load_image(image1) data_img = load_image(image2) - img1 = image_encoder(test_img) - img2 = image_encoder(data_img) + img1 = image_encoder(test_img, cache_dir) + img2 = image_encoder(data_img, cache_dir) cos_scores = util.pytorch_cos_sim(img1, img2) score = round(float(cos_scores[0][0]) * 100, 2) return score @@ -55,7 +57,8 @@ def main(): args = arg_parser() image1 = args.image1 image2 = args.image2 - score = round(generate_score(image1, image2), 2) + cache_dir = args.cache_dir + score = round(generate_score(image1, image2, cache_dir), 2) print("similarity Score: ", {score}) if score < 99: print(f"{image1} and {image2} are different") diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml index dd88a4d6d5632..11b3d61e6fa4c 100644 --- a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml @@ -172,6 +172,8 @@ stages: - job: Stable_Diffusion variables: skipComponentGovernanceDetection: true + CLIP_MODEL_CACHE: $(Agent.TempDirectory)/clip_cache + STABLE_DIFFUSION_MODEL_CACHE: $(Agent.TempDirectory)/stablediffusion_cache workspace: clean: all pool: onnxruntime-Linux-GPU-A10-12G @@ -188,9 +190,21 @@ stages: SpecificArtifact: ${{ parameters.specificArtifact }} BuildId: ${{ parameters.BuildId }} + - task: Cache@2 + inputs: + key: stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py + restoreKeys: | + stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py + stable_diffusion + path: $(STABLE_DIFFUSION_MODEL_CACHE) + displayName: Cache stable diffusion model + - script: | - docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \ - bash -c ' + docker run --rm --gpus all -v $PWD:/workspace \ + -v $(Build.BinariesDirectory)/Release:/Release \ + -v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \ + nvcr.io/nvidia/pytorch:22.11-py3 \ + bash -c ' \ set -ex; \ python3 --version; \ python3 -m pip install --upgrade pip; \ @@ -199,15 +213,26 @@ stages: python3 -m pip install -r requirements-cuda11.txt; \ python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \ echo Generate an image guided by a text prompt; \ - python3 demo_txt2img.py --seed 1 --deterministic "astronaut riding a horse on mars" ; \ + python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \ find $(pwd) -name "*.png" ; \ popd ; \ ' displayName: 'Run stable diffusion demo' workingDirectory: $(Build.SourcesDirectory) + - task: Cache@2 + inputs: + key: '"clip_model" | "2.24.0"' + restoreKeys: | + "clip_model" | "2.24.0" + "clip_model" + path: $(CLIP_MODEL_CACHE) + displayName: Cache clip model + - script: | - docker run --rm --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:22.11-py3 \ + docker run --rm --gpus all -v $PWD:/workspace \ + -v $(CLIP_MODEL_CACHE):/model_cache:rw \ + nvcr.io/nvidia/pytorch:22.11-py3 \ bash -c ' set -ex; \ python3 --version; \ @@ -217,7 +242,7 @@ stages: pushd test; \ python3 -m pip install -r requirements.txt; \ echo check demo_txt2image.py generate image; \ - python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2; \ + python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \ popd ; \ popd ; \ '