From ad93b7f619451f3b687b0f66df4582d83c93bc38 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 16:46:00 +0200 Subject: [PATCH 01/23] Enable export latent consistency model --- optimum/exporters/onnx/model_configs.py | 2 ++ optimum/exporters/tasks.py | 7 +++++-- optimum/onnxruntime/modeling_diffusion.py | 15 +++++++++++++++ optimum/utils/input_generators.py | 14 +++++++++++--- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index e1461c2a0c..5b1268f3c6 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -928,6 +928,8 @@ def inputs(self) -> Dict[str, Dict[int, str]]: common_inputs["text_embeds"] = {0: "batch_size"} common_inputs["time_ids"] = {0: "batch_size"} + if getattr(self._normalized_config, "time_cond_proj_dim", None) is not None: + common_inputs["timestep_cond"] = {0: "batch_size"} return common_inputs @property diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 2a0f9076ce..7ff1649546 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -187,6 +187,7 @@ class TasksManager: _DIFFUSERS_TASKS_TO_MODEL_LOADERS = { "stable-diffusion": "StableDiffusionPipeline", "stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline", + "stable-diffusion-latent-consistency": "LatentConsistencyModelPipeline", } _TIMM_TASKS_TO_MODEL_LOADERS = { @@ -1361,12 +1362,14 @@ def _infer_task_from_model_or_model_class( pt_auto_module = importlib.import_module("transformers.models.auto.modeling_auto") tf_auto_module = importlib.import_module("transformers.models.auto.modeling_tf_auto") + is_diffusers = model_class.config_name == "model_index.json" + for auto_cls_name, task in itertools.chain.from_iterable(iterable): if any( ( target_name.startswith("Auto"), target_name.startswith("TFAuto"), - "StableDiffusion" in target_name, + is_diffusers, ) ): if target_name == auto_cls_name: @@ -1409,7 +1412,7 @@ def _infer_task_from_model_name_or_path( model_info = huggingface_hub.model_info(model_name_or_path, revision=revision) if getattr(model_info, "library_name", None) == "diffusers": # TODO : getattr(model_info, "model_index") defining auto_model_class_name currently set to None - for task in ("stable-diffusion-xl", "stable-diffusion"): + for task in ("stable-diffusion-xl", "stable-diffusion", "stable-diffusion-latent-consistency"): if task in model_info.tags: inferred_task_name = task break diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index bc0d17a93a..3e9adcccde 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -29,7 +29,9 @@ PNDMScheduler, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline, + LatentConsistencyModelPipeline, ) + from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available from huggingface_hub import snapshot_download @@ -45,6 +47,7 @@ from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin +from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import LatentConsistencyModelPipelinePipelineMixin from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, @@ -562,6 +565,18 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi __call__ = StableDiffusionInpaintPipelineMixin.__call__ + +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). + """ + auto_model_class = LatentConsistencyModelPipeline + __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__ + + + + class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): auto_model_class = StableDiffusionXLImg2ImgPipeline diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 2073344395..700aa4fccb 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -684,6 +684,7 @@ class DummyTimestepInputGenerator(DummyInputGenerator): "timestep", "text_embeds", "time_ids", + "timestep_cond", ) def __init__( @@ -703,14 +704,21 @@ def __init__( self.batch_size = random.randint(low, high) else: self.batch_size = batch_size + self.time_cond_proj_dim = normalized_config.config.time_cond_proj_dim def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): - shape = [self.batch_size] - if input_name == "timestep": + shape = [self.batch_size] return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype) + + if input_name == "text_embeds": + dim = self.text_encoder_projection_dim + elif input_name == "timestep_cond": + dim = self.time_cond_proj_dim + else: + dim = self.time_ids - shape.append(self.text_encoder_projection_dim if input_name == "text_embeds" else self.time_ids) + shape = [self.batch_size, dim] return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) From 057e5762b882c19febaab10cfd46865db276d58e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 16:46:24 +0200 Subject: [PATCH 02/23] add pipeline --- .../diffusers/pipeline_latent_consistency.py | 233 ++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 optimum/pipelines/diffusers/pipeline_latent_consistency.py diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py new file mode 100644 index 0000000000..b9287a178d --- /dev/null +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -0,0 +1,233 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import logging +from typing import Callable, List, Optional, Union + +import numpy as np +import torch +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput + +from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg +from .pipeline_stable_diffusion import StableDiffusionPipelineMixin + + +logger = logging.getLogger(__name__) + + +class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin): + + # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 + def __call__( + self, + prompt: Optional[Union[str, List[str]]] = None, + height: Optional[int] = 768, # TODO : default to None + width: Optional[int] = 768, + num_inference_steps: int = 4, + guidance_scale: float = 7.5, + num_images_per_prompt: int = 1, + generator: Optional[np.random.RandomState] = None, + latents: Optional[np.ndarray] = None, + prompt_embeds: Optional[np.ndarray] = None, + lcm_origin_steps: int = 50, + output_type: str = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, np.ndarray], None]] = None, + callback_steps: int = 1, + guidance_rescale: float = 0.0, + ): + r""" + Function invoked when calling the pipeline for generation. + + Args: + prompt (`Optional[Union[str, List[str]]]`, defaults to None): + The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. + instead. + height (`Optional[int]`, defaults to None): + The height in pixels of the generated image. + width (`Optional[int]`, defaults to None): + The width in pixels of the generated image. + num_inference_steps (`int`, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + num_images_per_prompt (`int`, defaults to 1): + The number of images to generate per prompt. + eta (`float`, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`Optional[np.random.RandomState]`, defaults to `None`):: + A np.random.RandomState to make generation deterministic. + latents (`Optional[np.ndarray]`, defaults to `None`): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + prompt_embeds (`Optional[np.ndarray]`, defaults to `None`): + Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not + provided, text embeddings will be generated from `prompt` input argument. + output_type (`str`, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + callback (Optional[Callable], defaults to `None`): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + guidance_rescale (`float`, defaults to 0.0): + Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of + [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). + Guidance rescale factor should fix overexposure when using zero terminal SNR. + + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor + width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor + + + # Don't need to get negative prompts due to LCM guided distillation + negative_prompt=None + negative_prompt_embeds=None + + # check inputs. Raise error if not correct + self.check_inputs( + prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds + ) + + # define call parameters + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + batch_size = prompt_embeds.shape[0] + + if generator is None: + generator = np.random + + prompt_embeds = self._encode_prompt( + prompt, + num_images_per_prompt, + do_classifier_free_guidance, + negative_prompt, + prompt_embeds=prompt_embeds, + negative_prompt_embeds=negative_prompt_embeds, + ) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps) + timesteps = self.scheduler.timesteps + + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + self.unet.config.get("in_channels", 4), + height, + width, + prompt_embeds.dtype, + generator, + latents, + ) + + bs = batch_size * num_images_per_prompt + # get Guidance Scale Embedding + w = np.full(bs, guidance_scale) + w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype) + + # Adapted from diffusers to extend it for other runtimes than ORT + timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) + + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + for i, t in enumerate(self.progress_bar(timesteps)): + + # predict the noise residual + timestep = np.full(bs, t, dtype=timestep_dtype) + noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds, timestep_cond=w_embedding) + noise_pred = noise_pred[0] + + # compute the previous noisy sample x_t -> x_t-1 + scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents)) + latents = scheduler_output.prev_sample.numpy() + + # call the callback, if provided + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + if callback is not None and i % callback_steps == 0: + callback(i, t, latents) + + if output_type == "latent": + image = latents + has_nsfw_concept = None + else: + latents /= self.vae_decoder.config.get("scaling_factor", 0.18215) + # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 + image = np.concatenate( + [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] + ) + image, has_nsfw_concept = self.run_safety_checker(image) + + if has_nsfw_concept is None: + do_denormalize = [True] * image.shape[0] + else: + do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] + + image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) + + + # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 + def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None): + """ + See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 + + Args: + timesteps (`torch.Tensor`): + generate embedding vectors at these timesteps + embedding_dim (`int`, *optional*, defaults to 512): + dimension of the embeddings to generate + dtype: + data type of the generated embeddings + + Returns: + `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` + """ + w = w * 1000.0 + half_dim = embedding_dim // 2 + emb = np.log(10000.0) / (half_dim - 1) + emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb) + emb = w[:, None] * emb[None, :] + emb = np.concatenate([np.sin(emb), np.cos(emb)], axis=1) + + if embedding_dim % 2 == 1: # zero pad + emb = np.pad(emb, [(0, 0), (0, 1)]) + + assert emb.shape == (w.shape[0], embedding_dim) + return emb From 8af29e884c1bcfc48808c76af04c1e18a4783f0a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 16:46:55 +0200 Subject: [PATCH 03/23] format --- optimum/onnxruntime/modeling_diffusion.py | 8 +++---- .../diffusers/pipeline_latent_consistency.py | 23 ++++++++++--------- optimum/utils/input_generators.py | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 3e9adcccde..88e2339b64 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -47,7 +47,9 @@ from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import LatentConsistencyModelPipelinePipelineMixin +from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import ( + LatentConsistencyModelPipelinePipelineMixin, +) from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, @@ -565,18 +567,16 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi __call__ = StableDiffusionInpaintPipelineMixin.__call__ - @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ + auto_model_class = LatentConsistencyModelPipeline __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__ - - class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): auto_model_class = StableDiffusionXLImg2ImgPipeline diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index b9287a178d..13bd165e37 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -28,12 +28,11 @@ class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin): - # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 def __call__( self, prompt: Optional[Union[str, List[str]]] = None, - height: Optional[int] = 768, # TODO : default to None + height: Optional[int] = 768, # TODO : default to None width: Optional[int] = 768, num_inference_steps: int = 4, guidance_scale: float = 7.5, @@ -110,10 +109,9 @@ def __call__( height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - - # Don't need to get negative prompts due to LCM guided distillation - negative_prompt=None - negative_prompt_embeds=None + # Don't need to get negative prompts due to LCM guided distillation + negative_prompt = None + negative_prompt_embeds = None # check inputs. Raise error if not correct self.check_inputs( @@ -164,10 +162,14 @@ def __call__( num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order for i, t in enumerate(self.progress_bar(timesteps)): - # predict the noise residual timestep = np.full(bs, t, dtype=timestep_dtype) - noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds, timestep_cond=w_embedding) + noise_pred = self.unet( + sample=latent_model_input, + timestep=timestep, + encoder_hidden_states=prompt_embeds, + timestep_cond=w_embedding, + ) noise_pred = noise_pred[0] # compute the previous noisy sample x_t -> x_t-1 @@ -202,12 +204,11 @@ def __call__( return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) - # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None): """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 - + Args: timesteps (`torch.Tensor`): generate embedding vectors at these timesteps @@ -215,7 +216,7 @@ def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None): dimension of the embeddings to generate dtype: data type of the generated embeddings - + Returns: `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` """ diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 700aa4fccb..1028307f8c 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -710,7 +710,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int if input_name == "timestep": shape = [self.batch_size] return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype) - + if input_name == "text_embeds": dim = self.text_encoder_projection_dim elif input_name == "timestep_cond": From da9aaa52c550a848a3238483e1af136757052869 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 16:50:22 +0200 Subject: [PATCH 04/23] fix docstring --- optimum/pipelines/diffusers/pipeline_latent_consistency.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 13bd165e37..0f745466bd 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -69,9 +69,6 @@ def __call__( usually at the expense of lower image quality. num_images_per_prompt (`int`, defaults to 1): The number of images to generate per prompt. - eta (`float`, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [`schedulers.DDIMScheduler`], will be ignored for others. generator (`Optional[np.random.RandomState]`, defaults to `None`):: A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): From 3411b84bdb2c305583205eedbe75e65d23110977 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 18:58:47 +0200 Subject: [PATCH 05/23] fix --- optimum/onnxruntime/modeling_diffusion.py | 8 ++++---- .../diffusers/pipeline_latent_consistency.py | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 88e2339b64..24748c3b82 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -47,9 +47,7 @@ from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin -from ..pipelines.diffusers.pipeline_stable_diffusion_latent_consistency import ( - LatentConsistencyModelPipelinePipelineMixin, -) +from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, @@ -506,6 +504,7 @@ def forward( encoder_hidden_states: np.ndarray, text_embeds: Optional[np.ndarray] = None, time_ids: Optional[np.ndarray] = None, + timestep_cond: Optional[np.ndarray] = None, ): onnx_inputs = { "sample": sample, @@ -517,7 +516,8 @@ def forward( onnx_inputs["text_embeds"] = text_embeds if time_ids is not None: onnx_inputs["time_ids"] = time_ids - + if timestep_cond is not None: + onnx_inputs["timestep_cond"] = timestep_cond outputs = self.session.run(None, onnx_inputs) return outputs diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 0f745466bd..6566b66732 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -129,7 +129,7 @@ def __call__( prompt_embeds = self._encode_prompt( prompt, num_images_per_prompt, - do_classifier_free_guidance, + False, # Don't need to get negative prompts due to LCM guided distillation negative_prompt, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, @@ -151,7 +151,7 @@ def __call__( bs = batch_size * num_images_per_prompt # get Guidance Scale Embedding - w = np.full(bs, guidance_scale) + w = np.full(bs, guidance_scale, dtype=prompt_embeds.dtype) w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype) # Adapted from diffusers to extend it for other runtimes than ORT @@ -162,12 +162,11 @@ def __call__( # predict the noise residual timestep = np.full(bs, t, dtype=timestep_dtype) noise_pred = self.unet( - sample=latent_model_input, + sample=latents, timestep=timestep, encoder_hidden_states=prompt_embeds, timestep_cond=w_embedding, - ) - noise_pred = noise_pred[0] + )[0] # compute the previous noisy sample x_t -> x_t-1 scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents)) @@ -217,7 +216,7 @@ def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=None): Returns: `torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` """ - w = w * 1000.0 + w = w * 1000 half_dim = embedding_dim // 2 emb = np.log(10000.0) / (half_dim - 1) emb = np.exp(np.arange(half_dim, dtype=dtype) * -emb) From 2d0142d581c1ccb7965a891077500fb518231892 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 19 Oct 2023 18:59:49 +0200 Subject: [PATCH 06/23] format --- optimum/pipelines/diffusers/pipeline_latent_consistency.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 6566b66732..b38c80ddab 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect import logging from typing import Callable, List, Optional, Union @@ -20,7 +19,6 @@ import torch from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from .pipeline_utils import DiffusionPipelineMixin, rescale_noise_cfg from .pipeline_stable_diffusion import StableDiffusionPipelineMixin From 6c54062fa0df512af89162cf21a0428431180f80 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 20 Oct 2023 10:05:53 +0200 Subject: [PATCH 07/23] format --- optimum/onnxruntime/modeling_diffusion.py | 5 ++--- optimum/utils/import_utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 24748c3b82..872821ece3 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -25,13 +25,12 @@ import torch from diffusers import ( DDIMScheduler, + LatentConsistencyModelPipeline, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline, - LatentConsistencyModelPipeline, ) - from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, is_invisible_watermark_available from huggingface_hub import snapshot_download @@ -42,12 +41,12 @@ from ..exporters.onnx import main_export from ..onnx.utils import _get_external_data_paths +from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl import StableDiffusionXLPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipelineMixin -from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin from ..pipelines.diffusers.pipeline_utils import VaeImageProcessor from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index ed310a43ea..7905b3f107 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -34,7 +34,7 @@ TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0") TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0") -DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0") +DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.22.0") AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2") From 74b02d92f5649b4a221424664475a7842448867d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 20 Oct 2023 14:42:47 +0200 Subject: [PATCH 08/23] modify regex pattern --- optimum/exporters/onnx/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 1e5704e893..0ac7b1304b 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -435,7 +435,7 @@ def ordered_inputs(self, model: Union["PreTrainedModel", "TFPreTrainedModel"]) - sig = inspect.signature(model.call) for param in sig.parameters: - param_regex = re.compile(rf"{param}(\.\d*)?") + param_regex = re.compile(rf"{param}(\.\d*)?$") to_insert = [] for name, dynamic_axes in inputs.items(): if re.match(param_regex, name): From ec1da5126154065af6cb779ff50de5f992aeaf97 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 20 Oct 2023 16:14:45 +0200 Subject: [PATCH 09/23] remove constraint diffusers version --- optimum/onnxruntime/modeling_diffusion.py | 2 -- optimum/utils/import_utils.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 872821ece3..364f091e52 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -25,7 +25,6 @@ import torch from diffusers import ( DDIMScheduler, - LatentConsistencyModelPipeline, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline, @@ -572,7 +571,6 @@ class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentCo ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ - auto_model_class = LatentConsistencyModelPipeline __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__ diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index 7905b3f107..ed310a43ea 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -34,7 +34,7 @@ TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0") TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0") -DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.22.0") +DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0") AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2") From 510db7edd157481300f87c306ae045b2e6246fc2 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 20 Oct 2023 16:17:15 +0200 Subject: [PATCH 10/23] fix typo --- optimum/onnxruntime/modeling_diffusion.py | 6 +++--- optimum/pipelines/diffusers/pipeline_latent_consistency.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 364f091e52..ce51d6a8fc 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -40,7 +40,7 @@ from ..exporters.onnx import main_export from ..onnx.utils import _get_external_data_paths -from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyModelPipelinePipelineMixin +from ..pipelines.diffusers.pipeline_latent_consistency import LatentConsistencyPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin from ..pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin @@ -566,12 +566,12 @@ class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDi @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) -class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyModelPipelinePipelineMixin): +class ORTLatentConsistencyModelPipeline(ORTStableDiffusionPipelineBase, LatentConsistencyPipelineMixin): """ ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline). """ - __call__ = LatentConsistencyModelPipelinePipelineMixin.__call__ + __call__ = LatentConsistencyPipelineMixin.__call__ class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index b38c80ddab..a8105e2921 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -class LatentConsistencyModelPipelinePipelineMixin(StableDiffusionPipelineMixin): +class LatentConsistencyPipelineMixin(StableDiffusionPipelineMixin): # Adapted from https://github.com/huggingface/diffusers/blob/v0.22.0/src/diffusers/pipelines/latent_consistency/pipeline_latent_consistency.py#L264 def __call__( self, From d6cf152057e92e6c7330fbc89621ca931b4fcf3d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 24 Oct 2023 11:35:11 +0200 Subject: [PATCH 11/23] fix regex --- optimum/exporters/onnx/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 0ac7b1304b..b623d3bd22 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -435,7 +435,7 @@ def ordered_inputs(self, model: Union["PreTrainedModel", "TFPreTrainedModel"]) - sig = inspect.signature(model.call) for param in sig.parameters: - param_regex = re.compile(rf"{param}(\.\d*)?$") + param_regex = re.compile(rf"{param}(\..*)?$") to_insert = [] for name, dynamic_axes in inputs.items(): if re.match(param_regex, name): From af4f2e3fd6987e5283453e6aac9e52a1ac66f371 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 24 Oct 2023 16:59:41 +0200 Subject: [PATCH 12/23] fix pipeline --- .../diffusers/pipeline_latent_consistency.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index a8105e2921..7258ccb867 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -30,20 +30,19 @@ class LatentConsistencyPipelineMixin(StableDiffusionPipelineMixin): def __call__( self, prompt: Optional[Union[str, List[str]]] = None, - height: Optional[int] = 768, # TODO : default to None - width: Optional[int] = 768, + height: Optional[int] = None, + width: Optional[int] = None, num_inference_steps: int = 4, - guidance_scale: float = 7.5, + original_inference_steps: int = None, + guidance_scale: float = 8.5, num_images_per_prompt: int = 1, generator: Optional[np.random.RandomState] = None, latents: Optional[np.ndarray] = None, prompt_embeds: Optional[np.ndarray] = None, - lcm_origin_steps: int = 50, output_type: str = "pil", return_dict: bool = True, callback: Optional[Callable[[int, int, np.ndarray], None]] = None, callback_steps: int = 1, - guidance_rescale: float = 0.0, ): r""" Function invoked when calling the pipeline for generation. @@ -101,8 +100,8 @@ def __call__( list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" (nsfw) content, according to the `safety_checker`. """ - height = height or self.unet.config.get("sample_size", 64) * self.vae_scale_factor - width = width or self.unet.config.get("sample_size", 64) * self.vae_scale_factor + height = height or self.unet.config["sample_size"] * self.vae_scale_factor + width = width or self.unet.config["sample_size"]* self.vae_scale_factor # Don't need to get negative prompts due to LCM guided distillation negative_prompt = None @@ -127,19 +126,19 @@ def __call__( prompt_embeds = self._encode_prompt( prompt, num_images_per_prompt, - False, # Don't need to get negative prompts due to LCM guided distillation + False, negative_prompt, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, ) # set timesteps - self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps) + self.scheduler.set_timesteps(num_inference_steps, original_inference_steps=original_inference_steps) timesteps = self.scheduler.timesteps latents = self.prepare_latents( batch_size * num_images_per_prompt, - self.unet.config.get("in_channels", 4), + self.unet.config["in_channels"], height, width, prompt_embeds.dtype, @@ -149,16 +148,15 @@ def __call__( bs = batch_size * num_images_per_prompt # get Guidance Scale Embedding - w = np.full(bs, guidance_scale, dtype=prompt_embeds.dtype) - w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=256, dtype=prompt_embeds.dtype) + w = np.full(bs, guidance_scale - 1, dtype=prompt_embeds.dtype) + w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype) # Adapted from diffusers to extend it for other runtimes than ORT timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order for i, t in enumerate(self.progress_bar(timesteps)): - # predict the noise residual - timestep = np.full(bs, t, dtype=timestep_dtype) + timestep = np.array([t], dtype=timestep_dtype) noise_pred = self.unet( sample=latents, timestep=timestep, @@ -169,6 +167,7 @@ def __call__( # compute the previous noisy sample x_t -> x_t-1 scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents)) latents = scheduler_output.prev_sample.numpy() + denoised = scheduler_output.denoised.numpy() # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): @@ -176,13 +175,13 @@ def __call__( callback(i, t, latents) if output_type == "latent": - image = latents + image = denoised has_nsfw_concept = None else: - latents /= self.vae_decoder.config.get("scaling_factor", 0.18215) + denoised /= self.vae_decoder.config["scaling_factor"] # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 image = np.concatenate( - [self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])] + [self.vae_decoder(latent_sample=denoised[i : i + 1])[0] for i in range(denoised.shape[0])] ) image, has_nsfw_concept = self.run_safety_checker(image) From a44ed5a9a468bca2730ac80e05555e124e8ba4da Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 24 Oct 2023 17:24:15 +0200 Subject: [PATCH 13/23] fix infered task --- optimum/exporters/tasks.py | 12 +++--------- .../diffusers/pipeline_latent_consistency.py | 5 ++--- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 7ff1649546..46ee59be34 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -187,7 +187,6 @@ class TasksManager: _DIFFUSERS_TASKS_TO_MODEL_LOADERS = { "stable-diffusion": "StableDiffusionPipeline", "stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline", - "stable-diffusion-latent-consistency": "LatentConsistencyModelPipeline", } _TIMM_TASKS_TO_MODEL_LOADERS = { @@ -1362,14 +1361,12 @@ def _infer_task_from_model_or_model_class( pt_auto_module = importlib.import_module("transformers.models.auto.modeling_auto") tf_auto_module = importlib.import_module("transformers.models.auto.modeling_tf_auto") - is_diffusers = model_class.config_name == "model_index.json" - for auto_cls_name, task in itertools.chain.from_iterable(iterable): if any( ( target_name.startswith("Auto"), target_name.startswith("TFAuto"), - is_diffusers, + "StableDiffusion" in target_name, ) ): if target_name == auto_cls_name: @@ -1411,11 +1408,8 @@ def _infer_task_from_model_name_or_path( ) model_info = huggingface_hub.model_info(model_name_or_path, revision=revision) if getattr(model_info, "library_name", None) == "diffusers": - # TODO : getattr(model_info, "model_index") defining auto_model_class_name currently set to None - for task in ("stable-diffusion-xl", "stable-diffusion", "stable-diffusion-latent-consistency"): - if task in model_info.tags: - inferred_task_name = task - break + class_name = model_info.config["diffusers"]["class_name"] + inferred_task_name = "stable-diffusion-xl" if "StableDiffusionXL" in class_name else "stable-diffusion" elif getattr(model_info, "library_name", None) == "timm": inferred_task_name = "image-classification" else: diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 7258ccb867..8100981a8d 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -165,9 +165,8 @@ def __call__( )[0] # compute the previous noisy sample x_t -> x_t-1 - scheduler_output = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents)) - latents = scheduler_output.prev_sample.numpy() - denoised = scheduler_output.denoised.numpy() + latents, denoised = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False) + latents, denoised = latents.numpy(), denoised.numpy() # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): From ac878acb194467b4297d5d3fc87e8fbeed4da8b6 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 24 Oct 2023 17:26:03 +0200 Subject: [PATCH 14/23] style --- .../pipelines/diffusers/pipeline_latent_consistency.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/optimum/pipelines/diffusers/pipeline_latent_consistency.py b/optimum/pipelines/diffusers/pipeline_latent_consistency.py index 8100981a8d..41c85b5b6a 100644 --- a/optimum/pipelines/diffusers/pipeline_latent_consistency.py +++ b/optimum/pipelines/diffusers/pipeline_latent_consistency.py @@ -101,7 +101,7 @@ def __call__( (nsfw) content, according to the `safety_checker`. """ height = height or self.unet.config["sample_size"] * self.vae_scale_factor - width = width or self.unet.config["sample_size"]* self.vae_scale_factor + width = width or self.unet.config["sample_size"] * self.vae_scale_factor # Don't need to get negative prompts due to LCM guided distillation negative_prompt = None @@ -149,7 +149,9 @@ def __call__( bs = batch_size * num_images_per_prompt # get Guidance Scale Embedding w = np.full(bs, guidance_scale - 1, dtype=prompt_embeds.dtype) - w_embedding = self.get_guidance_scale_embedding(w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype) + w_embedding = self.get_guidance_scale_embedding( + w, embedding_dim=self.unet.config["time_cond_proj_dim"], dtype=prompt_embeds.dtype + ) # Adapted from diffusers to extend it for other runtimes than ORT timestep_dtype = self.unet.input_dtype.get("timestep", np.float32) @@ -165,7 +167,9 @@ def __call__( )[0] # compute the previous noisy sample x_t -> x_t-1 - latents, denoised = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False) + latents, denoised = self.scheduler.step( + torch.from_numpy(noise_pred), t, torch.from_numpy(latents), return_dict=False + ) latents, denoised = latents.numpy(), denoised.numpy() # call the callback, if provided From 46dc6537f4c0ab60f658dc4349db76267ef51b51 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Oct 2023 11:02:12 +0200 Subject: [PATCH 15/23] add test --- .github/workflows/test_onnxruntime.yml | 1 + optimum/onnxruntime/__init__.py | 4 + optimum/utils/dummy_diffusers_objects.py | 13 ++++ .../test_stable_diffusion_pipeline.py | 75 +++++++++++++++++-- tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 5 files changed, 89 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 261235e6be..4bf0bf3c2f 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -29,6 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | + pip install git+https://github.com/huggingface/diffusers pip install .[tests,onnxruntime] - name: Test with pytest working-directory: tests diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index c6bb12916a..e89c1579b1 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -78,6 +78,7 @@ "ORTStableDiffusionInpaintPipeline", "ORTStableDiffusionXLPipeline", "ORTStableDiffusionXLImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", ] else: _import_structure["modeling_diffusion"] = [ @@ -86,6 +87,7 @@ "ORTStableDiffusionInpaintPipeline", "ORTStableDiffusionXLPipeline", "ORTStableDiffusionXLImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", ] @@ -139,6 +141,7 @@ ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, ORTStableDiffusionXLImg2ImgPipeline, + ORTLatentConsistencyModelPipeline, ORTStableDiffusionXLPipeline, ) else: @@ -147,6 +150,7 @@ ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, ORTStableDiffusionXLImg2ImgPipeline, + ORTLatentConsistencyModelPipeline, ORTStableDiffusionXLPipeline, ) else: diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index f85a0987d4..da6edb279a 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -68,3 +68,16 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) + + + +class ORTLatentConsistencyModelPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index 6deb32bea3..c1223b6cc3 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -25,23 +25,32 @@ StableDiffusionPipeline, StableDiffusionXLPipeline, ) +from packaging.version import Version, parse +from optimum.utils.import_utils import _diffusers_version from diffusers.utils import load_image from diffusers.utils.testing_utils import floats_tensor from parameterized import parameterized from transformers.testing_utils import require_torch_gpu from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin -from optimum.onnxruntime import ORTStableDiffusionPipeline +from optimum.onnxruntime import ( + ORTStableDiffusionPipeline, + ORTStableDiffusionImg2ImgPipeline, + ORTStableDiffusionInpaintPipeline, + ORTStableDiffusionXLImg2ImgPipeline, + ORTStableDiffusionXLPipeline, + ORTLatentConsistencyModelPipeline, +) + + from optimum.onnxruntime.modeling_diffusion import ( ORTModelTextEncoder, ORTModelUnet, ORTModelVaeDecoder, ORTModelVaeEncoder, - ORTStableDiffusionImg2ImgPipeline, - ORTStableDiffusionInpaintPipeline, - ORTStableDiffusionXLImg2ImgPipeline, - ORTStableDiffusionXLPipeline, ) + + from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor from optimum.utils.testing_utils import grid_parameters, require_diffusers @@ -483,3 +492,59 @@ def test_vae_image_processor_pil(self): in_np = np.array(i) out_np = to_np(out) if output_type == "pil" else (to_np(out) * 255).round() self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) + + +@unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") +class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): + SUPPORTED_ARCHITECTURES = [ + "latent-consistency", + ] + ORTMODEL_CLASS = ORTLatentConsistencyModelPipeline + TASK = "text-to-image" + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + @require_diffusers + def test_compare_to_diffusers(self, model_arch: str): + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) + self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) + self.assertIsInstance(ort_pipeline.vae_decoder, ORTModelVaeDecoder) + self.assertIsInstance(ort_pipeline.vae_encoder, ORTModelVaeEncoder) + self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) + self.assertIsInstance(ort_pipeline.config, Dict) + + + from diffusers import LatentConsistencyModelPipeline + + pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) + batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 + latents = ort_pipeline.prepare_latents( + batch_size * num_images_per_prompt, + ort_pipeline.unet.config["in_channels"], + height, + width, + dtype=np.float32, + generator=np.random.RandomState(0), + ) + + kwargs = { + "prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size, + "num_inference_steps": 1, + "num_images_per_prompt": num_images_per_prompt, + "height": height, + "width": width, + "guidance_scale": 8.5, + } + + for output_type in ["latent", "np"]: + ort_outputs = ort_pipeline(latents=latents, output_type=output_type, **kwargs).images + self.assertIsInstance(ort_outputs, np.ndarray) + with torch.no_grad(): + outputs = pipeline(latents=torch.from_numpy(latents), output_type=output_type, **kwargs).images + + # Compare model outputs + self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) + # Compare model devices + self.assertEqual(pipeline.device, ort_pipeline.device) + + + diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index f8a1c36e2b..654e63c639 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -68,6 +68,7 @@ "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", "levit": "hf-internal-testing/tiny-random-LevitModel", + "latent-consistency": "echarlaix/tiny-random-latent-consistency", "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", From 4180d1b1364b892c8e44677be5e18b59cdcdbc95 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Oct 2023 11:02:46 +0200 Subject: [PATCH 16/23] fix style --- optimum/utils/dummy_diffusers_objects.py | 2 -- tests/onnxruntime/test_stable_diffusion_pipeline.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index da6edb279a..f6914bbcd3 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -70,7 +70,6 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) - class ORTLatentConsistencyModelPipeline(metaclass=DummyObject): _backends = ["diffusers"] @@ -80,4 +79,3 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) - diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index c1223b6cc3..d0861544b3 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -512,7 +512,6 @@ def test_compare_to_diffusers(self, model_arch: str): self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) self.assertIsInstance(ort_pipeline.config, Dict) - from diffusers import LatentConsistencyModelPipeline pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) @@ -545,6 +544,3 @@ def test_compare_to_diffusers(self, model_arch: str): self.assertTrue(np.allclose(ort_outputs, outputs, atol=1e-4)) # Compare model devices self.assertEqual(pipeline.device, ort_pipeline.device) - - - From 8d4069c8645f68f7c5c1ea4c0585bac7b9875b80 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Oct 2023 11:04:11 +0200 Subject: [PATCH 17/23] fix style --- optimum/onnxruntime/__init__.py | 4 ++-- tests/onnxruntime/test_stable_diffusion_pipeline.py | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index e89c1579b1..f1d4f63a9f 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -137,20 +137,20 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: from ..utils.dummy_diffusers_objects import ( + ORTLatentConsistencyModelPipeline, ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, ORTStableDiffusionXLImg2ImgPipeline, - ORTLatentConsistencyModelPipeline, ORTStableDiffusionXLPipeline, ) else: from .modeling_diffusion import ( + ORTLatentConsistencyModelPipeline, ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, ORTStableDiffusionXLImg2ImgPipeline, - ORTLatentConsistencyModelPipeline, ORTStableDiffusionXLPipeline, ) else: diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index d0861544b3..cdd071e649 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -25,33 +25,29 @@ StableDiffusionPipeline, StableDiffusionXLPipeline, ) -from packaging.version import Version, parse -from optimum.utils.import_utils import _diffusers_version from diffusers.utils import load_image from diffusers.utils.testing_utils import floats_tensor +from packaging.version import Version, parse from parameterized import parameterized from transformers.testing_utils import require_torch_gpu from utils_onnxruntime_tests import MODEL_NAMES, SEED, ORTModelTestMixin from optimum.onnxruntime import ( - ORTStableDiffusionPipeline, + ORTLatentConsistencyModelPipeline, ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, + ORTStableDiffusionPipeline, ORTStableDiffusionXLImg2ImgPipeline, ORTStableDiffusionXLPipeline, - ORTLatentConsistencyModelPipeline, ) - - from optimum.onnxruntime.modeling_diffusion import ( ORTModelTextEncoder, ORTModelUnet, ORTModelVaeDecoder, ORTModelVaeEncoder, ) - - from optimum.pipelines.diffusers.pipeline_utils import VaeImageProcessor +from optimum.utils.import_utils import _diffusers_version from optimum.utils.testing_utils import grid_parameters, require_diffusers From 824fc5702532eed226c139fdeba897be9cb7be16 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Oct 2023 13:12:54 +0200 Subject: [PATCH 18/23] add documentation --- .../package_reference/modeling_ort.mdx | 5 +++++ docs/source/onnxruntime/usage_guides/models.mdx | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/docs/source/onnxruntime/package_reference/modeling_ort.mdx b/docs/source/onnxruntime/package_reference/modeling_ort.mdx index 47c961791c..65b2b60195 100644 --- a/docs/source/onnxruntime/package_reference/modeling_ort.mdx +++ b/docs/source/onnxruntime/package_reference/modeling_ort.mdx @@ -143,3 +143,8 @@ The following ORT classes are available for the following custom tasks. [[autodoc]] onnxruntime.ORTStableDiffusionXLImg2ImgPipeline - __call__ + +#### ORTLatentConsistencyModelPipeline + +[[autodoc]] onnxruntime.ORTLatentConsistencyModelPipeline + - __call__ diff --git a/docs/source/onnxruntime/usage_guides/models.mdx b/docs/source/onnxruntime/usage_guides/models.mdx index b7e8549e8e..131822e956 100644 --- a/docs/source/onnxruntime/usage_guides/models.mdx +++ b/docs/source/onnxruntime/usage_guides/models.mdx @@ -248,3 +248,19 @@ image = refiner(prompt=prompt, image=image[None, :]).images[0] image.save("sailing_ship.png") ``` + + +## Latent Consistency Models + +### Text-to-Image + +Here is an example of how you can load a Latent Consistency Models (LCMs) from [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) and run inference using ONNX Runtime : + +```python +from optimum.onnxruntime import ORTLatentConsistencyModelPipeline + +model_id = "SimianLuo/LCM_Dreamshaper_v7" +pipeline = ORTLatentConsistencyModelPipeline.from_pretrained(model_id, export=True) +prompt = "sailing ship in storm by Leonardo da Vinci" +images = pipeline(prompt, num_inference_steps=4, guidance_scale=8.0).images +``` From 683b39c049ba5145f1629e40ea640b1c719b4c37 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Oct 2023 15:23:55 +0200 Subject: [PATCH 19/23] fix --- tests/onnxruntime/test_stable_diffusion_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index cdd071e649..40c00a4685 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -490,7 +490,6 @@ def test_vae_image_processor_pil(self): self.assertTrue(np.allclose(in_np, out_np, atol=1e-6)) -@unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = [ "latent-consistency", @@ -500,6 +499,7 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers + @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") def test_compare_to_diffusers(self, model_arch: str): ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) From 1604240a1b40bdb78a7e3b193ed2adc9d9727507 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Oct 2023 13:47:30 +0100 Subject: [PATCH 20/23] add precision for diffusers min version --- tests/onnxruntime/test_stable_diffusion_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index 40c00a4685..c0c25ae279 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -499,7 +499,7 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers - @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version") + @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version, needs diffusers>=v0.22.0") def test_compare_to_diffusers(self, model_arch: str): ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) From 583ba148e0c64f08409257aa25dfc9e36bdc3c12 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Oct 2023 13:51:54 +0100 Subject: [PATCH 21/23] move import --- tests/onnxruntime/test_stable_diffusion_pipeline.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index c0c25ae279..3b89d5949c 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -50,6 +50,9 @@ from optimum.utils.import_utils import _diffusers_version from optimum.utils.testing_utils import grid_parameters, require_diffusers +if parse(_diffusers_version) > Version("0.21.4"): + from diffusers import LatentConsistencyModelPipeline + def _generate_inputs(batch_size=1): inputs = { @@ -499,7 +502,10 @@ class ORTLatentConsistencyModelPipelineTest(ORTModelTestMixin): @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers - @unittest.skipIf(parse(_diffusers_version) <= Version("0.21.4"), "not supported with this diffusers version, needs diffusers>=v0.22.0") + @unittest.skipIf( + parse(_diffusers_version) <= Version("0.21.4"), + "not supported with this diffusers version, needs diffusers>=v0.22.0", + ) def test_compare_to_diffusers(self, model_arch: str): ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], export=True) self.assertIsInstance(ort_pipeline.text_encoder, ORTModelTextEncoder) @@ -508,8 +514,6 @@ def test_compare_to_diffusers(self, model_arch: str): self.assertIsInstance(ort_pipeline.unet, ORTModelUnet) self.assertIsInstance(ort_pipeline.config, Dict) - from diffusers import LatentConsistencyModelPipeline - pipeline = LatentConsistencyModelPipeline.from_pretrained(MODEL_NAMES[model_arch]) batch_size, num_images_per_prompt, height, width = 2, 2, 64, 32 latents = ort_pipeline.prepare_latents( From f96881034e9dbf53330915dc74da831c763112dc Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Oct 2023 13:52:07 +0100 Subject: [PATCH 22/23] rm install from source --- .github/workflows/test_onnxruntime.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 4bf0bf3c2f..261235e6be 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -29,7 +29,6 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install git+https://github.com/huggingface/diffusers pip install .[tests,onnxruntime] - name: Test with pytest working-directory: tests From d723e4b2a41006b403c1113bc9937e7be8dd5fd4 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Oct 2023 13:52:33 +0100 Subject: [PATCH 23/23] format --- tests/onnxruntime/test_stable_diffusion_pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/onnxruntime/test_stable_diffusion_pipeline.py b/tests/onnxruntime/test_stable_diffusion_pipeline.py index 3b89d5949c..0f166af290 100644 --- a/tests/onnxruntime/test_stable_diffusion_pipeline.py +++ b/tests/onnxruntime/test_stable_diffusion_pipeline.py @@ -50,6 +50,7 @@ from optimum.utils.import_utils import _diffusers_version from optimum.utils.testing_utils import grid_parameters, require_diffusers + if parse(_diffusers_version) > Version("0.21.4"): from diffusers import LatentConsistencyModelPipeline