From ad93b7f619451f3b687b0f66df4582d83c93bc38 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 16:46:00 +0200
Subject: [PATCH 01/23] Enable export latent consistency model

---
 optimum/exporters/onnx/model_configs.py   |  2 ++
 optimum/exporters/tasks.py                |  7 +++++--
 optimum/onnxruntime/modeling_diffusion.py | 15 +++++++++++++++
 optimum/utils/input_generators.py         | 14 +++++++++++---
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index e1461c2a0c..5b1268f3c6 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -928,6 +928,8 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
             common_inputs["text_embeds"] = {0: "batch_size"}
             common_inputs["time_ids"] = {0: "batch_size"}
 
+        if getattr(self._normalized_config, "time_cond_proj_dim", None) is not None:
+            common_inputs["timestep_cond"] = {0: "batch_size"}
         return common_inputs
 
     @property
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 2a0f9076ce..7ff1649546 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -187,6 +187,7 @@ class TasksManager:
         _DIFFUSERS_TASKS_TO_MODEL_LOADERS = {
             "stable-diffusion": "StableDiffusionPipeline",
             "stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline",
+            "stable-diffusion-latent-consistency": "LatentConsistencyModelPipeline",
         }
 
         _TIMM_TASKS_TO_MODEL_LOADERS = {
@@ -1361,12 +1362,14 @@ def _infer_task_from_model_or_model_class(
 
         pt_auto_module = importlib.import_module("transformers.models.auto.modeling_auto")
         tf_auto_module = importlib.import_module("transformers.models.auto.modeling_tf_auto")
+        is_diffusers = model_class.config_name == "model_index.json"
+
         for auto_cls_name, task in itertools.chain.from_iterable(iterable):
             if any(
                 (
                     target_name.startswith("Auto"),
                     target_name.startswith("TFAuto"),
-                    "StableDiffusion" in target_name,
+                    is_diffusers,
                 )
             ):
                 if target_name == auto_cls_name:
@@ -1409,7 +1412,7 @@ def _infer_task_from_model_name_or_path(
             model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
             if getattr(model_info, "library_name", None) == "diffusers":
                 # TODO : getattr(model_info, "model_index") defining auto_model_class_name currently set to None
-                for task in ("stable-diffusion-xl", "stable-diffusion"):
+                for task in ("stable-diffusion-xl", "stable-diffusion", "stable-diffusion-latent-consistency"):
                     if task in model_info.tags:
                         inferred_task_name = task
                         break
diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index bc0d17a93a..3e9adcccde 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -29,7 +29,9 @@
     PNDMScheduler,
     StableDiffusionPipeline,
     StableDiffusionXLImg2ImgPipeline,
+    LatentConsistencyModelPipeline,
 )
+
 from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available
 from huggingface_hub import snapshot_download
@@ -45,6 +47,7 @@
 from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin
+from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
 from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor
 from ..utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
@@ -562,6 +565,18 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi
     __call__ = StableDiffusionInpaintPipelineMixin.__call__
 
 
+
+@add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
+class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin):
+    """
+    ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
+    """
+    auto_model_class = LatentConsistencyModelPipeline
+    __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__
+
+
+
+
 class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase):
     auto_model_class = StableDiffusionXLImg2ImgPipeline
 
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index 2073344395..700aa4fccb 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -684,6 +684,7 @@ class DummyTimestepInputGenerator(DummyInputGenerator):
         "timestep",
         "text_embeds",
         "time_ids",
+        "timestep_cond",
     )
 
     def __init__(
@@ -703,14 +704,21 @@ def __init__(
             self.batch_size = random.randint(low, high)
         else:
             self.batch_size = batch_size
+        self.time_cond_proj_dim = normalized_config.config.time_cond_proj_dim
 
     def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
-        shape = [self.batch_size]
-
         if input_name == "timestep":
+            shape = [self.batch_size]
             return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype)
+        
+        if input_name == "text_embeds":
+            dim = self.text_encoder_projection_dim
+        elif input_name == "timestep_cond":
+            dim = self.time_cond_proj_dim
+        else:
+            dim = self.time_ids
 
-        shape.append(self.text_encoder_projection_dim if input_name == "text_embeds" else self.time_ids)
+        shape = [self.batch_size, dim]
         return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype)
 
 

From 057e5762b882c19febaab10cfd46865db276d58e Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 16:46:24 +0200
Subject: [PATCH 02/23] add pipeline

---
 .../diffusers/pipeline_latent_consistency.py  | 233 ++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 optimum/pipelines/diffusers/pipeline_latent_consistency.py

diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
new file mode 100644
index 0000000000..b9287a178d
--- /dev/null
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -0,0 +1,233 @@
+#  Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import inspect
+import logging
+from typing import Callable, List, Optional, Union
+
+import numpy as np
+import torch
+from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
+
+from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg
+from .pipeline_stable_diffusion import StableDiffusionPipelineMixin
+
+
+logger = logging.getLogger(__name__)
+
+
+class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin):
+    
+    # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264
+    def __call__(
+        self,
+        prompt: Optional[Union[str, List[str]]] = None,
+        height: Optional[int] = 768, # TODO : default to None
+        width: Optional[int] = 768,
+        num_inference_steps: int = 4,
+        guidance_scale: float = 7.5,
+        num_images_per_prompt: int = 1,
+        generator: Optional[np.random.RandomState] = None,
+        latents: Optional[np.ndarray] = None,
+        prompt_embeds: Optional[np.ndarray] = None,
+        lcm_origin_steps: int = 50,
+        output_type: str = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
+        callback_steps: int = 1,
+        guidance_rescale: float = 0.0,
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+
+        Args:
+            prompt (`Optional[Union[str, List[str]]]`, defaults to None):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            height (`Optional[int]`, defaults to None):
+                The height in pixels of the generated image.
+            width (`Optional[int]`, defaults to None):
+                The width in pixels of the generated image.
+            num_inference_steps (`int`, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            guidance_scale (`float`, defaults to 7.5):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            num_images_per_prompt (`int`, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`Optional[np.random.RandomState]`, defaults to `None`)::
+                A np.random.RandomState to make generation deterministic.
+            latents (`Optional[np.ndarray]`, defaults to `None`):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`Optional[np.ndarray]`, defaults to `None`):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            output_type (`str`, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                plain tuple.
+            callback (Optional[Callable], defaults to `None`):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            guidance_rescale (`float`, defaults to 0.0):
+                Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+                Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+                [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+                Guidance rescale factor should fix overexposure when using zero terminal SNR.
+
+        Returns:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
+            When returning a tuple, the first element is a list with the generated images, and the second element is a
+            list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
+            (nsfw) content, according to the `safety_checker`.
+        """
+        height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
+        width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
+
+
+        # Don't need to get negative prompts due to LCM guided distillation  
+        negative_prompt=None
+        negative_prompt_embeds=None
+
+        # check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds
+        )
+
+        # define call parameters
+        if isinstance(prompt, str):
+            batch_size = 1
+        elif isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        if generator is None:
+            generator = np.random
+
+        prompt_embeds = self._encode_prompt(
+            prompt,
+            num_images_per_prompt,
+            do_classifier_free_guidance,
+            negative_prompt,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+        )
+
+        # set timesteps
+        self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps)
+        timesteps = self.scheduler.timesteps
+
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            self.unet.config.get("in_channels", 4),
+            height,
+            width,
+            prompt_embeds.dtype,
+            generator,
+            latents,
+        )
+
+        bs = batch_size * num_images_per_prompt
+        # get Guidance Scale Embedding
+        w = np.full(bs, guidance_scale)
+        w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype)
+
+        # Adapted from diffusers to extend it for other runtimes than ORT
+        timestep_dtype = self.unet.input_dtype.get("timestep", np.float32)
+
+        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+        for i, t in enumerate(self.progress_bar(timesteps)):
+    
+            # predict the noise residual
+            timestep = np.full(bs, t, dtype=timestep_dtype)
+            noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds, timestep_cond=w_embedding)
+            noise_pred = noise_pred[0]
+
+            # compute the previous noisy sample x_t -> x_t-1
+            scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents))
+            latents = scheduler_output.prev_sample.numpy()
+
+            # call the callback, if provided
+            if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                if callback is not None and i % callback_steps == 0:
+                    callback(i, t, latents)
+
+        if output_type == "latent":
+            image = latents
+            has_nsfw_concept = None
+        else:
+            latents /= self.vae_decoder.config.get("scaling_factor", 0.18215)
+            # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
+            image = np.concatenate(
+                [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])]
+            )
+            image, has_nsfw_concept = self.run_safety_checker(image)
+
+        if has_nsfw_concept is None:
+            do_denormalize = [True] * image.shape[0]
+        else:
+            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+
+        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+
+        if not return_dict:
+            return (image, has_nsfw_concept)
+
+        return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
+
+
+    # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264
+    def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None):
+        """
+        See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
+        
+        Args:
+            timesteps (`torch.Tensor`):
+                generate embedding vectors at these timesteps
+            embedding_dim (`int`, *optional*, defaults to 512):
+                dimension of the embeddings to generate
+            dtype:
+                data type of the generated embeddings
+        
+        Returns:
+            `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
+        """
+        w = w * 1000.0
+        half_dim = embedding_dim // 2
+        emb = np.log(10000.0) / (half_dim - 1)
+        emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb)
+        emb = w[:, None] * emb[None, :]
+        emb = np.concatenate([np.sin(emb), np.cos(emb)], axis=1)
+
+        if embedding_dim % 2 == 1:  # zero pad
+            emb = np.pad(emb, [(0, 0), (0, 1)])
+
+        assert emb.shape == (w.shape[0], embedding_dim)
+        return emb

From 8af29e884c1bcfc48808c76af04c1e18a4783f0a Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 16:46:55 +0200
Subject: [PATCH 03/23] format

---
 optimum/onnxruntime/modeling_diffusion.py     |  8 +++----
 .../diffusers/pipeline_latent_consistency.py  | 23 ++++++++++---------
 optimum/utils/input_generators.py             |  2 +-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index 3e9adcccde..88e2339b64 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -47,7 +47,9 @@
 from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin
-from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
+from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import (
+    LatentConsistencyModelPipelinePipelineMixin,
+)
 from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor
 from ..utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
@@ -565,18 +567,16 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi
     __call__ = StableDiffusionInpaintPipelineMixin.__call__
 
 
-
 @add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
 class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin):
     """
     ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
     """
+
     auto_model_class = LatentConsistencyModelPipeline
     __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__
 
 
-
-
 class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase):
     auto_model_class = StableDiffusionXLImg2ImgPipeline
 
diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index b9287a178d..13bd165e37 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -28,12 +28,11 @@
 
 
 class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin):
-    
     # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264
     def __call__(
         self,
         prompt: Optional[Union[str, List[str]]] = None,
-        height: Optional[int] = 768, # TODO : default to None
+        height: Optional[int] = 768,  # TODO : default to None
         width: Optional[int] = 768,
         num_inference_steps: int = 4,
         guidance_scale: float = 7.5,
@@ -110,10 +109,9 @@ def __call__(
         height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
         width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
 
-
-        # Don't need to get negative prompts due to LCM guided distillation  
-        negative_prompt=None
-        negative_prompt_embeds=None
+        # Don't need to get negative prompts due to LCM guided distillation
+        negative_prompt = None
+        negative_prompt_embeds = None
 
         # check inputs. Raise error if not correct
         self.check_inputs(
@@ -164,10 +162,14 @@ def __call__(
 
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         for i, t in enumerate(self.progress_bar(timesteps)):
-    
             # predict the noise residual
             timestep = np.full(bs, t, dtype=timestep_dtype)
-            noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds, timestep_cond=w_embedding)
+            noise_pred = self.unet(
+                sample=latent_model_input,
+                timestep=timestep,
+                encoder_hidden_states=prompt_embeds,
+                timestep_cond=w_embedding,
+            )
             noise_pred = noise_pred[0]
 
             # compute the previous noisy sample x_t -> x_t-1
@@ -202,12 +204,11 @@ def __call__(
 
         return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
 
-
     # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264
     def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None):
         """
         See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
-        
+
         Args:
             timesteps (`torch.Tensor`):
                 generate embedding vectors at these timesteps
@@ -215,7 +216,7 @@ def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None):
                 dimension of the embeddings to generate
             dtype:
                 data type of the generated embeddings
-        
+
         Returns:
             `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
         """
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index 700aa4fccb..1028307f8c 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -710,7 +710,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
         if input_name == "timestep":
             shape = [self.batch_size]
             return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype)
-        
+
         if input_name == "text_embeds":
             dim = self.text_encoder_projection_dim
         elif input_name == "timestep_cond":

From da9aaa52c550a848a3238483e1af136757052869 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 16:50:22 +0200
Subject: [PATCH 04/23] fix docstring

---
 optimum/pipelines/diffusers/pipeline_latent_consistency.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index 13bd165e37..0f745466bd 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -69,9 +69,6 @@ def __call__(
                 usually at the expense of lower image quality.
             num_images_per_prompt (`int`, defaults to 1):
                 The number of images to generate per prompt.
-            eta (`float`, defaults to 0.0):
-                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
-                [`schedulers.DDIMScheduler`], will be ignored for others.
             generator (`Optional[np.random.RandomState]`, defaults to `None`)::
                 A np.random.RandomState to make generation deterministic.
             latents (`Optional[np.ndarray]`, defaults to `None`):

From 3411b84bdb2c305583205eedbe75e65d23110977 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 18:58:47 +0200
Subject: [PATCH 05/23] fix

---
 optimum/onnxruntime/modeling_diffusion.py             |  8 ++++----
 .../diffusers/pipeline_latent_consistency.py          | 11 +++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index 88e2339b64..24748c3b82 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -47,9 +47,7 @@
 from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin
-from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import (
-    LatentConsistencyModelPipelinePipelineMixin,
-)
+from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
 from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor
 from ..utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
@@ -506,6 +504,7 @@ def forward(
         encoder_hidden_states: np.ndarray,
         text_embeds: Optional[np.ndarray] = None,
         time_ids: Optional[np.ndarray] = None,
+        timestep_cond: Optional[np.ndarray] = None,
     ):
         onnx_inputs = {
             "sample": sample,
@@ -517,7 +516,8 @@ def forward(
             onnx_inputs["text_embeds"] = text_embeds
         if time_ids is not None:
             onnx_inputs["time_ids"] = time_ids
-
+        if timestep_cond is not None:
+            onnx_inputs["timestep_cond"] = timestep_cond
         outputs = self.session.run(None, onnx_inputs)
         return outputs
 
diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index 0f745466bd..6566b66732 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -129,7 +129,7 @@ def __call__(
         prompt_embeds = self._encode_prompt(
             prompt,
             num_images_per_prompt,
-            do_classifier_free_guidance,
+            False,  # Don't need to get negative prompts due to LCM guided distillation
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
@@ -151,7 +151,7 @@ def __call__(
 
         bs = batch_size * num_images_per_prompt
         # get Guidance Scale Embedding
-        w = np.full(bs, guidance_scale)
+        w = np.full(bs, guidance_scale, dtype=prompt_embeds.dtype)
         w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype)
 
         # Adapted from diffusers to extend it for other runtimes than ORT
@@ -162,12 +162,11 @@ def __call__(
             # predict the noise residual
             timestep = np.full(bs, t, dtype=timestep_dtype)
             noise_pred = self.unet(
-                sample=latent_model_input,
+                sample=latents,
                 timestep=timestep,
                 encoder_hidden_states=prompt_embeds,
                 timestep_cond=w_embedding,
-            )
-            noise_pred = noise_pred[0]
+            )[0]
 
             # compute the previous noisy sample x_t -> x_t-1
             scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents))
@@ -217,7 +216,7 @@ def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None):
         Returns:
             `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
         """
-        w = w * 1000.0
+        w = w * 1000
         half_dim = embedding_dim // 2
         emb = np.log(10000.0) / (half_dim - 1)
         emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb)

From 2d0142d581c1ccb7965a891077500fb518231892 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 19 Oct 2023 18:59:49 +0200
Subject: [PATCH 06/23] format

---
 optimum/pipelines/diffusers/pipeline_latent_consistency.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index 6566b66732..b38c80ddab 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -12,7 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import inspect
 import logging
 from typing import Callable, List, Optional, Union
 
@@ -20,7 +19,6 @@
 import torch
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
 
-from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg
 from .pipeline_stable_diffusion import StableDiffusionPipelineMixin
 
 

From 6c54062fa0df512af89162cf21a0428431180f80 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 20 Oct 2023 10:05:53 +0200
Subject: [PATCH 07/23]  format

---
 optimum/onnxruntime/modeling_diffusion.py | 5 ++---
 optimum/utils/import_utils.py             | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index 24748c3b82..872821ece3 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -25,13 +25,12 @@
 import torch
 from diffusers import (
     DDIMScheduler,
+    LatentConsistencyModelPipeline,
     LMSDiscreteScheduler,
     PNDMScheduler,
     StableDiffusionPipeline,
     StableDiffusionXLImg2ImgPipeline,
-    LatentConsistencyModelPipeline,
 )
-
 from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
 from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available
 from huggingface_hub import snapshot_download
@@ -42,12 +41,12 @@
 
 from ..exporters.onnx import main_export
 from ..onnx.utils import _get_external_data_paths
+from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin
-from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
 from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor
 from ..utils import (
     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
index ed310a43ea..7905b3f107 100644
--- a/optimum/utils/import_utils.py
+++ b/optimum/utils/import_utils.py
@@ -34,7 +34,7 @@
 
 TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0")
 TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0")
-DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0")
+DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.22.0")
 AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2")
 
 

From 74b02d92f5649b4a221424664475a7842448867d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 20 Oct 2023 14:42:47 +0200
Subject: [PATCH 08/23] modify regex pattern

---
 optimum/exporters/onnx/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 1e5704e893..0ac7b1304b 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -435,7 +435,7 @@ def ordered_inputs(self, model: Union["PreTrainedModel", "TFPreTrainedModel"]) -
             sig = inspect.signature(model.call)
 
         for param in sig.parameters:
-            param_regex = re.compile(rf"{param}(\.\d*)?")
+            param_regex = re.compile(rf"{param}(\.\d*)?$")
             to_insert = []
             for name, dynamic_axes in inputs.items():
                 if re.match(param_regex, name):

From ec1da5126154065af6cb779ff50de5f992aeaf97 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 20 Oct 2023 16:14:45 +0200
Subject: [PATCH 09/23] remove constraint diffusers version

---
 optimum/onnxruntime/modeling_diffusion.py | 2 --
 optimum/utils/import_utils.py             | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index 872821ece3..364f091e52 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -25,7 +25,6 @@
 import torch
 from diffusers import (
     DDIMScheduler,
-    LatentConsistencyModelPipeline,
     LMSDiscreteScheduler,
     PNDMScheduler,
     StableDiffusionPipeline,
@@ -572,7 +571,6 @@ class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentCo
     ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
     """
 
-    auto_model_class = LatentConsistencyModelPipeline
     __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__
 
 
diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
index 7905b3f107..ed310a43ea 100644
--- a/optimum/utils/import_utils.py
+++ b/optimum/utils/import_utils.py
@@ -34,7 +34,7 @@
 
 TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0")
 TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0")
-DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.22.0")
+DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0")
 AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2")
 
 

From 510db7edd157481300f87c306ae045b2e6246fc2 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 20 Oct 2023 16:17:15 +0200
Subject: [PATCH 10/23] fix typo

---
 optimum/onnxruntime/modeling_diffusion.py                  | 6 +++---
 optimum/pipelines/diffusers/pipeline_latent_consistency.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py
index 364f091e52..ce51d6a8fc 100644
--- a/optimum/onnxruntime/modeling_diffusion.py
+++ b/optimum/onnxruntime/modeling_diffusion.py
@@ -40,7 +40,7 @@
 
 from ..exporters.onnx import main_export
 from ..onnx.utils import _get_external_data_paths
-from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin
+from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin
 from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin
@@ -566,12 +566,12 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi
 
 
 @add_end_docstrings(ONNX_MODEL_END_DOCSTRING)
-class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin):
+class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyPipelineMixin):
     """
     ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
     """
 
-    __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__
+    __call__ = LatentConsistencyPipelineMixin.__call__
 
 
 class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase):
diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index b38c80ddab..a8105e2921 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -25,7 +25,7 @@
 logger = logging.getLogger(__name__)
 
 
-class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin):
+class LatentConsistencyPipelineMixin(StableDiffusionPipelineMixin):
     # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264
     def __call__(
         self,

From d6cf152057e92e6c7330fbc89621ca931b4fcf3d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 24 Oct 2023 11:35:11 +0200
Subject: [PATCH 11/23] fix regex

---
 optimum/exporters/onnx/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 0ac7b1304b..b623d3bd22 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -435,7 +435,7 @@ def ordered_inputs(self, model: Union["PreTrainedModel", "TFPreTrainedModel"]) -
             sig = inspect.signature(model.call)
 
         for param in sig.parameters:
-            param_regex = re.compile(rf"{param}(\.\d*)?$")
+            param_regex = re.compile(rf"{param}(\..*)?$")
             to_insert = []
             for name, dynamic_axes in inputs.items():
                 if re.match(param_regex, name):

From af4f2e3fd6987e5283453e6aac9e52a1ac66f371 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 24 Oct 2023 16:59:41 +0200
Subject: [PATCH 12/23] fix pipeline

---
 .../diffusers/pipeline_latent_consistency.py  | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index a8105e2921..7258ccb867 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -30,20 +30,19 @@ class LatentConsistencyPipelineMixin(StableDiffusionPipelineMixin):
     def __call__(
         self,
         prompt: Optional[Union[str, List[str]]] = None,
-        height: Optional[int] = 768,  # TODO : default to None
-        width: Optional[int] = 768,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         num_inference_steps: int = 4,
-        guidance_scale: float = 7.5,
+        original_inference_steps: int = None,
+        guidance_scale: float = 8.5,
         num_images_per_prompt: int = 1,
         generator: Optional[np.random.RandomState] = None,
         latents: Optional[np.ndarray] = None,
         prompt_embeds: Optional[np.ndarray] = None,
-        lcm_origin_steps: int = 50,
         output_type: str = "pil",
         return_dict: bool = True,
         callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
         callback_steps: int = 1,
-        guidance_rescale: float = 0.0,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -101,8 +100,8 @@ def __call__(
             list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
             (nsfw) content, according to the `safety_checker`.
         """
-        height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
-        width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor
+        height = height or self.unet.config["sample_size"] * self.vae_scale_factor
+        width = width or self.unet.config["sample_size"]* self.vae_scale_factor
 
         # Don't need to get negative prompts due to LCM guided distillation
         negative_prompt = None
@@ -127,19 +126,19 @@ def __call__(
         prompt_embeds = self._encode_prompt(
             prompt,
             num_images_per_prompt,
-            False,  # Don't need to get negative prompts due to LCM guided distillation
+            False,
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
         )
 
         # set timesteps
-        self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps)
+        self.scheduler.set_timesteps(num_inference_steps, original_inference_steps=original_inference_steps)
         timesteps = self.scheduler.timesteps
 
         latents = self.prepare_latents(
             batch_size * num_images_per_prompt,
-            self.unet.config.get("in_channels", 4),
+            self.unet.config["in_channels"],
             height,
             width,
             prompt_embeds.dtype,
@@ -149,16 +148,15 @@ def __call__(
 
         bs = batch_size * num_images_per_prompt
         # get Guidance Scale Embedding
-        w = np.full(bs, guidance_scale, dtype=prompt_embeds.dtype)
-        w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype)
+        w = np.full(bs, guidance_scale - 1, dtype=prompt_embeds.dtype)
+        w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype)
 
         # Adapted from diffusers to extend it for other runtimes than ORT
         timestep_dtype = self.unet.input_dtype.get("timestep", np.float32)
 
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         for i, t in enumerate(self.progress_bar(timesteps)):
-            # predict the noise residual
-            timestep = np.full(bs, t, dtype=timestep_dtype)
+            timestep = np.array([t], dtype=timestep_dtype)
             noise_pred = self.unet(
                 sample=latents,
                 timestep=timestep,
@@ -169,6 +167,7 @@ def __call__(
             # compute the previous noisy sample x_t -> x_t-1
             scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents))
             latents = scheduler_output.prev_sample.numpy()
+            denoised = scheduler_output.denoised.numpy()
 
             # call the callback, if provided
             if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
@@ -176,13 +175,13 @@ def __call__(
                     callback(i, t, latents)
 
         if output_type == "latent":
-            image = latents
+            image = denoised
             has_nsfw_concept = None
         else:
-            latents /= self.vae_decoder.config.get("scaling_factor", 0.18215)
+            denoised /= self.vae_decoder.config["scaling_factor"]
             # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
             image = np.concatenate(
-                [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])]
+                [self.vae_decoder(latent_sample=denoised[i : i + 1])[0] for i in range(denoised.shape[0])]
             )
             image, has_nsfw_concept = self.run_safety_checker(image)
 

From a44ed5a9a468bca2730ac80e05555e124e8ba4da Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 24 Oct 2023 17:24:15 +0200
Subject: [PATCH 13/23] fix infered task

---
 optimum/exporters/tasks.py                           | 12 +++---------
 .../diffusers/pipeline_latent_consistency.py         |  5 ++---
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 7ff1649546..46ee59be34 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -187,7 +187,6 @@ class TasksManager:
         _DIFFUSERS_TASKS_TO_MODEL_LOADERS = {
             "stable-diffusion": "StableDiffusionPipeline",
             "stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline",
-            "stable-diffusion-latent-consistency": "LatentConsistencyModelPipeline",
         }
 
         _TIMM_TASKS_TO_MODEL_LOADERS = {
@@ -1362,14 +1361,12 @@ def _infer_task_from_model_or_model_class(
 
         pt_auto_module = importlib.import_module("transformers.models.auto.modeling_auto")
         tf_auto_module = importlib.import_module("transformers.models.auto.modeling_tf_auto")
-        is_diffusers = model_class.config_name == "model_index.json"
-
         for auto_cls_name, task in itertools.chain.from_iterable(iterable):
             if any(
                 (
                     target_name.startswith("Auto"),
                     target_name.startswith("TFAuto"),
-                    is_diffusers,
+                    "StableDiffusion" in target_name,
                 )
             ):
                 if target_name == auto_cls_name:
@@ -1411,11 +1408,8 @@ def _infer_task_from_model_name_or_path(
                 )
             model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
             if getattr(model_info, "library_name", None) == "diffusers":
-                # TODO : getattr(model_info, "model_index") defining auto_model_class_name currently set to None
-                for task in ("stable-diffusion-xl", "stable-diffusion", "stable-diffusion-latent-consistency"):
-                    if task in model_info.tags:
-                        inferred_task_name = task
-                        break
+                class_name = model_info.config["diffusers"]["class_name"]
+                inferred_task_name = "stable-diffusion-xl" if "StableDiffusionXL" in class_name else "stable-diffusion"
             elif getattr(model_info, "library_name", None) == "timm":
                 inferred_task_name = "image-classification"
             else:
diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index 7258ccb867..8100981a8d 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -165,9 +165,8 @@ def __call__(
             )[0]
 
             # compute the previous noisy sample x_t -> x_t-1
-            scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents))
-            latents = scheduler_output.prev_sample.numpy()
-            denoised = scheduler_output.denoised.numpy()
+            latents, denoised = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False)
+            latents, denoised = latents.numpy(), denoised.numpy()
 
             # call the callback, if provided
             if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):

From ac878acb194467b4297d5d3fc87e8fbeed4da8b6 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 24 Oct 2023 17:26:03 +0200
Subject: [PATCH 14/23] style

---
 .../pipelines/diffusers/pipeline_latent_consistency.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
index 8100981a8d..41c85b5b6a 100644
--- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py
+++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py
@@ -101,7 +101,7 @@ def __call__(
             (nsfw) content, according to the `safety_checker`.
         """
         height = height or self.unet.config["sample_size"] * self.vae_scale_factor
-        width = width or self.unet.config["sample_size"]* self.vae_scale_factor
+        width = width or self.unet.config["sample_size"] * self.vae_scale_factor
 
         # Don't need to get negative prompts due to LCM guided distillation
         negative_prompt = None
@@ -149,7 +149,9 @@ def __call__(
         bs = batch_size * num_images_per_prompt
         # get Guidance Scale Embedding
         w = np.full(bs, guidance_scale - 1, dtype=prompt_embeds.dtype)
-        w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype)
+        w_embedding = self.get_guidance_scale_embedding(
+            w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype
+        )
 
         # Adapted from diffusers to extend it for other runtimes than ORT
         timestep_dtype = self.unet.input_dtype.get("timestep", np.float32)
@@ -165,7 +167,9 @@ def __call__(
             )[0]
 
             # compute the previous noisy sample x_t -> x_t-1
-            latents, denoised = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False)
+            latents, denoised = self.scheduler.step(
+                torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False
+            )
             latents, denoised = latents.numpy(), denoised.numpy()
 
             # call the callback, if provided

From 46dc6537f4c0ab60f658dc4349db76267ef51b51 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 25 Oct 2023 11:02:12 +0200
Subject: [PATCH 15/23] add test

---
 .github/workflows/test_onnxruntime.yml        |  1 +
 optimum/onnxruntime/__init__.py               |  4 +
 optimum/utils/dummy_diffusers_objects.py      | 13 ++++
 .../test_stable_diffusion_pipeline.py         | 75 +++++++++++++++++--
 tests/onnxruntime/utils_onnxruntime_tests.py  |  1 +
 5 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
index 261235e6be..4bf0bf3c2f 100644
--- a/.github/workflows/test_onnxruntime.yml
+++ b/.github/workflows/test_onnxruntime.yml
@@ -29,6 +29,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
+        pip install git+https://github.com/huggingface/diffusers
         pip install .[tests,onnxruntime]
     - name: Test with pytest
       working-directory: tests
diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py
index c6bb12916a..e89c1579b1 100644
--- a/optimum/onnxruntime/__init__.py
+++ b/optimum/onnxruntime/__init__.py
@@ -78,6 +78,7 @@
         "ORTStableDiffusionInpaintPipeline",
         "ORTStableDiffusionXLPipeline",
         "ORTStableDiffusionXLImg2ImgPipeline",
+        "ORTLatentConsistencyModelPipeline",
     ]
 else:
     _import_structure["modeling_diffusion"] = [
@@ -86,6 +87,7 @@
         "ORTStableDiffusionInpaintPipeline",
         "ORTStableDiffusionXLPipeline",
         "ORTStableDiffusionXLImg2ImgPipeline",
+        "ORTLatentConsistencyModelPipeline",
     ]
 
 
@@ -139,6 +141,7 @@
             ORTStableDiffusionInpaintPipeline,
             ORTStableDiffusionPipeline,
             ORTStableDiffusionXLImg2ImgPipeline,
+            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionXLPipeline,
         )
     else:
@@ -147,6 +150,7 @@
             ORTStableDiffusionInpaintPipeline,
             ORTStableDiffusionPipeline,
             ORTStableDiffusionXLImg2ImgPipeline,
+            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionXLPipeline,
         )
 else:
diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py
index f85a0987d4..da6edb279a 100644
--- a/optimum/utils/dummy_diffusers_objects.py
+++ b/optimum/utils/dummy_diffusers_objects.py
@@ -68,3 +68,16 @@ def __init__(self, *args, **kwargs):
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["diffusers"])
+
+
+
+class ORTLatentConsistencyModelPipeline(metaclass=DummyObject):
+    _backends = ["diffusers"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["diffusers"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["diffusers"])
+
diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index 6deb32bea3..c1223b6cc3 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -25,23 +25,32 @@
     StableDiffusionPipeline,
     StableDiffusionXLPipeline,
 )
+from packaging.version import Version, parse
+from optimum.utils.import_utils import _diffusers_version
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import floats_tensor
 from parameterized import parameterized
 from transformers.testing_utils import require_torch_gpu
 from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin
 
-from optimum.onnxruntime import ORTStableDiffusionPipeline
+from optimum.onnxruntime import (
+    ORTStableDiffusionPipeline,
+    ORTStableDiffusionImg2ImgPipeline,
+    ORTStableDiffusionInpaintPipeline,
+    ORTStableDiffusionXLImg2ImgPipeline,
+    ORTStableDiffusionXLPipeline,
+    ORTLatentConsistencyModelPipeline,
+)
+
+
 from optimum.onnxruntime.modeling_diffusion import (
     ORTModelTextEncoder,
     ORTModelUnet,
     ORTModelVaeDecoder,
     ORTModelVaeEncoder,
-    ORTStableDiffusionImg2ImgPipeline,
-    ORTStableDiffusionInpaintPipeline,
-    ORTStableDiffusionXLImg2ImgPipeline,
-    ORTStableDiffusionXLPipeline,
 )
+
+
 from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor
 from optimum.utils.testing_utils import grid_parameters, require_diffusers
 
@@ -483,3 +492,59 @@ def test_vae_image_processor_pil(self):
                 in_np = np.array(i)
                 out_np = to_np(out) if output_type == "pil" else (to_np(out) * 255).round()
                 self.assertTrue(np.allclose(in_np, out_np, atol=1e-6))
+
+
+@unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
+class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin):
+    SUPPORTED_ARCHITECTURES = [
+        "latent-consistency",
+    ]
+    ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline
+    TASK = "text-to-image"
+
+    @parameterized.expand(SUPPORTED_ARCHITECTURES)
+    @require_diffusers
+    def test_compare_to_diffusers(self, model_arch: str):
+        ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True)
+        self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder)
+        self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder)
+        self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder)
+        self.assertIsInstance(ort_pipeline.unet, ORTModelUnet)
+        self.assertIsInstance(ort_pipeline.config, Dict)
+
+
+        from diffusers import LatentConsistencyModelPipeline
+
+        pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch])
+        batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32
+        latents = ort_pipeline.prepare_latents(
+            batch_size * num_images_per_prompt,
+            ort_pipeline.unet.config["in_channels"],
+            height,
+            width,
+            dtype=np.float32,
+            generator=np.random.RandomState(0),
+        )
+
+        kwargs = {
+            "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size,
+            "num_inference_steps": 1,
+            "num_images_per_prompt": num_images_per_prompt,
+            "height": height,
+            "width": width,
+            "guidance_scale": 8.5,
+        }
+
+        for output_type in ["latent", "np"]:
+            ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images
+            self.assertIsInstance(ort_outputs, np.ndarray)
+            with torch.no_grad():
+                outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images
+
+            # Compare model outputs
+            self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4))
+            # Compare model devices
+            self.assertEqual(pipeline.device, ort_pipeline.device)
+
+
+
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
index f8a1c36e2b..654e63c639 100644
--- a/tests/onnxruntime/utils_onnxruntime_tests.py
+++ b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -68,6 +68,7 @@
     "hubert": "hf-internal-testing/tiny-random-HubertModel",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
     "levit": "hf-internal-testing/tiny-random-LevitModel",
+    "latent-consistency": "echarlaix/tiny-random-latent-consistency",
     "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel",
     "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model",
     "longt5": "hf-internal-testing/tiny-random-LongT5Model",

From 4180d1b1364b892c8e44677be5e18b59cdcdbc95 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 25 Oct 2023 11:02:46 +0200
Subject: [PATCH 16/23] fix style

---
 optimum/utils/dummy_diffusers_objects.py            | 2 --
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py
index da6edb279a..f6914bbcd3 100644
--- a/optimum/utils/dummy_diffusers_objects.py
+++ b/optimum/utils/dummy_diffusers_objects.py
@@ -70,7 +70,6 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["diffusers"])
 
 
-
 class ORTLatentConsistencyModelPipeline(metaclass=DummyObject):
     _backends = ["diffusers"]
 
@@ -80,4 +79,3 @@ def __init__(self, *args, **kwargs):
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["diffusers"])
-
diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index c1223b6cc3..d0861544b3 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -512,7 +512,6 @@ def test_compare_to_diffusers(self, model_arch: str):
         self.assertIsInstance(ort_pipeline.unet, ORTModelUnet)
         self.assertIsInstance(ort_pipeline.config, Dict)
 
-
         from diffusers import LatentConsistencyModelPipeline
 
         pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch])
@@ -545,6 +544,3 @@ def test_compare_to_diffusers(self, model_arch: str):
             self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4))
             # Compare model devices
             self.assertEqual(pipeline.device, ort_pipeline.device)
-
-
-

From 8d4069c8645f68f7c5c1ea4c0585bac7b9875b80 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 25 Oct 2023 11:04:11 +0200
Subject: [PATCH 17/23] fix style

---
 optimum/onnxruntime/__init__.py                     |  4 ++--
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py
index e89c1579b1..f1d4f63a9f 100644
--- a/optimum/onnxruntime/__init__.py
+++ b/optimum/onnxruntime/__init__.py
@@ -137,20 +137,20 @@
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
         from ..utils.dummy_diffusers_objects import (
+            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionImg2ImgPipeline,
             ORTStableDiffusionInpaintPipeline,
             ORTStableDiffusionPipeline,
             ORTStableDiffusionXLImg2ImgPipeline,
-            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionXLPipeline,
         )
     else:
         from .modeling_diffusion import (
+            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionImg2ImgPipeline,
             ORTStableDiffusionInpaintPipeline,
             ORTStableDiffusionPipeline,
             ORTStableDiffusionXLImg2ImgPipeline,
-            ORTLatentConsistencyModelPipeline,
             ORTStableDiffusionXLPipeline,
         )
 else:
diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index d0861544b3..cdd071e649 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -25,33 +25,29 @@
     StableDiffusionPipeline,
     StableDiffusionXLPipeline,
 )
-from packaging.version import Version, parse
-from optimum.utils.import_utils import _diffusers_version
 from diffusers.utils import load_image
 from diffusers.utils.testing_utils import floats_tensor
+from packaging.version import Version, parse
 from parameterized import parameterized
 from transformers.testing_utils import require_torch_gpu
 from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin
 
 from optimum.onnxruntime import (
-    ORTStableDiffusionPipeline,
+    ORTLatentConsistencyModelPipeline,
     ORTStableDiffusionImg2ImgPipeline,
     ORTStableDiffusionInpaintPipeline,
+    ORTStableDiffusionPipeline,
     ORTStableDiffusionXLImg2ImgPipeline,
     ORTStableDiffusionXLPipeline,
-    ORTLatentConsistencyModelPipeline,
 )
-
-
 from optimum.onnxruntime.modeling_diffusion import (
     ORTModelTextEncoder,
     ORTModelUnet,
     ORTModelVaeDecoder,
     ORTModelVaeEncoder,
 )
-
-
 from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor
+from optimum.utils.import_utils import _diffusers_version
 from optimum.utils.testing_utils import grid_parameters, require_diffusers
 
 

From 824fc5702532eed226c139fdeba897be9cb7be16 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 25 Oct 2023 13:12:54 +0200
Subject: [PATCH 18/23] add documentation

---
 .../package_reference/modeling_ort.mdx           |  5 +++++
 docs/source/onnxruntime/usage_guides/models.mdx  | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/docs/source/onnxruntime/package_reference/modeling_ort.mdx b/docs/source/onnxruntime/package_reference/modeling_ort.mdx
index 47c961791c..65b2b60195 100644
--- a/docs/source/onnxruntime/package_reference/modeling_ort.mdx
+++ b/docs/source/onnxruntime/package_reference/modeling_ort.mdx
@@ -143,3 +143,8 @@ The following ORT classes are available for the following custom tasks.
 
 [[autodoc]] onnxruntime.ORTStableDiffusionXLImg2ImgPipeline
     - __call__
+
+#### ORTLatentConsistencyModelPipeline
+
+[[autodoc]] onnxruntime.ORTLatentConsistencyModelPipeline
+    - __call__
diff --git a/docs/source/onnxruntime/usage_guides/models.mdx b/docs/source/onnxruntime/usage_guides/models.mdx
index b7e8549e8e..131822e956 100644
--- a/docs/source/onnxruntime/usage_guides/models.mdx
+++ b/docs/source/onnxruntime/usage_guides/models.mdx
@@ -248,3 +248,19 @@ image = refiner(prompt=prompt, image=image[None, :]).images[0]
 image.save("sailing_ship.png")
 ```
 
+
+
+## Latent Consistency Models
+
+### Text-to-Image
+
+Here is an example of how you can load a Latent Consistency Models (LCMs) from [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) and run inference using ONNX Runtime :
+
+```python
+from optimum.onnxruntime import ORTLatentConsistencyModelPipeline
+
+model_id = "SimianLuo/LCM_Dreamshaper_v7"
+pipeline = ORTLatentConsistencyModelPipeline.from_pretrained(model_id, export=True)
+prompt = "sailing ship in storm by Leonardo da Vinci"
+images = pipeline(prompt, num_inference_steps=4, guidance_scale=8.0).images
+```

From 683b39c049ba5145f1629e40ea640b1c719b4c37 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 25 Oct 2023 15:23:55 +0200
Subject: [PATCH 19/23] fix

---
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index cdd071e649..40c00a4685 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -490,7 +490,6 @@ def test_vae_image_processor_pil(self):
                 self.assertTrue(np.allclose(in_np, out_np, atol=1e-6))
 
 
-@unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
 class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin):
     SUPPORTED_ARCHITECTURES = [
         "latent-consistency",
@@ -500,6 +499,7 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin):
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     @require_diffusers
+    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
     def test_compare_to_diffusers(self, model_arch: str):
         ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True)
         self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder)

From 1604240a1b40bdb78a7e3b193ed2adc9d9727507 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 30 Oct 2023 13:47:30 +0100
Subject: [PATCH 20/23] add precision for diffusers min version

---
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index 40c00a4685..c0c25ae279 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -499,7 +499,7 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin):
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     @require_diffusers
-    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version")
+    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version, needs diffusers>=v0.22.0")
     def test_compare_to_diffusers(self, model_arch: str):
         ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True)
         self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder)

From 583ba148e0c64f08409257aa25dfc9e36bdc3c12 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 30 Oct 2023 13:51:54 +0100
Subject: [PATCH 21/23] move import

---
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index c0c25ae279..3b89d5949c 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -50,6 +50,9 @@
 from optimum.utils.import_utils import _diffusers_version
 from optimum.utils.testing_utils import grid_parameters, require_diffusers
 
+if parse(_diffusers_version) > Version("0.21.4"):
+    from diffusers import LatentConsistencyModelPipeline
+
 
 def _generate_inputs(batch_size=1):
     inputs = {
@@ -499,7 +502,10 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin):
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     @require_diffusers
-    @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version, needs diffusers>=v0.22.0")
+    @unittest.skipIf(
+        parse(_diffusers_version) <= Version("0.21.4"),
+        "not supported with this diffusers version, needs diffusers>=v0.22.0",
+    )
     def test_compare_to_diffusers(self, model_arch: str):
         ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True)
         self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder)
@@ -508,8 +514,6 @@ def test_compare_to_diffusers(self, model_arch: str):
         self.assertIsInstance(ort_pipeline.unet, ORTModelUnet)
         self.assertIsInstance(ort_pipeline.config, Dict)
 
-        from diffusers import LatentConsistencyModelPipeline
-
         pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch])
         batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32
         latents = ort_pipeline.prepare_latents(

From f96881034e9dbf53330915dc74da831c763112dc Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 30 Oct 2023 13:52:07 +0100
Subject: [PATCH 22/23] rm install from source

---
 .github/workflows/test_onnxruntime.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
index 4bf0bf3c2f..261235e6be 100644
--- a/.github/workflows/test_onnxruntime.yml
+++ b/.github/workflows/test_onnxruntime.yml
@@ -29,7 +29,6 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        pip install git+https://github.com/huggingface/diffusers
         pip install .[tests,onnxruntime]
     - name: Test with pytest
       working-directory: tests

From d723e4b2a41006b403c1113bc9937e7be8dd5fd4 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 30 Oct 2023 13:52:33 +0100
Subject: [PATCH 23/23] format

---
 tests/onnxruntime/test_stable_diffusion_pipeline.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py
index 3b89d5949c..0f166af290 100644
--- a/tests/onnxruntime/test_stable_diffusion_pipeline.py
+++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py
@@ -50,6 +50,7 @@
 from optimum.utils.import_utils import _diffusers_version
 from optimum.utils.testing_utils import grid_parameters, require_diffusers
 
+
 if parse(_diffusers_version) > Version("0.21.4"):
     from diffusers import LatentConsistencyModelPipeline