From 8da8ed043e517171dfd766d977bfc8a36b2d4fa0 Mon Sep 17 00:00:00 2001
From: Rick Staa <rick.staa@outlook.com>
Date: Tue, 30 Jul 2024 12:41:00 +0200
Subject: [PATCH] feat(model): add Realistic Vision model T2I support (#136)

This commit ensures that the https://huggingface.co/SG161222/Realistic_Vision_V6.0_B1_noVAE
model is supported in the T2I pipeline.
---
 runner/app/pipelines/text_to_image.py | 7 +++++++
 runner/dl_checkpoints.sh              | 1 +
 2 files changed, 8 insertions(+)

diff --git a/runner/app/pipelines/text_to_image.py b/runner/app/pipelines/text_to_image.py
index e2d6c692..0dc54278 100644
--- a/runner/app/pipelines/text_to_image.py
+++ b/runner/app/pipelines/text_to_image.py
@@ -21,6 +21,7 @@
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
 )
+from diffusers.models import AutoencoderKL
 from huggingface_hub import file_download, hf_hub_download
 from safetensors.torch import load_file
 
@@ -32,6 +33,7 @@ class ModelName(Enum):
 
     SDXL_LIGHTNING = "ByteDance/SDXL-Lightning"
     SD3_MEDIUM = "stabilityai/stable-diffusion-3-medium-diffusers"
+    REALISTIC_VISION_V6 = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
 
     @classmethod
     def list(cls):
@@ -69,6 +71,11 @@ def __init__(self, model_id: str):
         if os.environ.get("BFLOAT16"):
             logger.info("TextToImagePipeline using bfloat16 precision for %s", model_id)
             kwargs["torch_dtype"] = torch.bfloat16
+        
+        # Load VAE for specific models.
+        if ModelName.REALISTIC_VISION_V6.value in model_id:
+            vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema")
+            kwargs["vae"] = vae
 
         # Special case SDXL-Lightning because the unet for SDXL needs to be swapped
         if ModelName.SDXL_LIGHTNING.value in model_id:
diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
index 822590d4..9fe40837 100755
--- a/runner/dl_checkpoints.sh
+++ b/runner/dl_checkpoints.sh
@@ -56,6 +56,7 @@ function download_all_models() {
     huggingface-cli download prompthero/openjourney-v4 --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
     huggingface-cli download SG161222/RealVisXL_V4.0 --include "*.fp16.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
     huggingface-cli download stabilityai/stable-diffusion-3-medium-diffusers --include "*.fp16*.safetensors" "*.model" "*.json" "*.txt" --cache-dir models ${TOKEN_FLAG:+"$TOKEN_FLAG"}
+    huggingface-cli download SG161222/Realistic_Vision_V6.0_B1_noVAE --include "*.fp16.safetensors" "*.json" "*.txt" "*.bin" --exclude ".onnx" ".onnx_data" --cache-dir models
 
     # Download image-to-video models.
     huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --include "*.fp16.safetensors" "*.json" --cache-dir models