From 77c1c145aef430eb46f0aa1b36213358e751b1eb Mon Sep 17 00:00:00 2001
From: Marco van Dijk <marco@stronk.rocks>
Date: Thu, 11 Apr 2024 21:52:06 +0200
Subject: [PATCH 1/5] Add support for the `timbrooks/instruct-pix2pix` model

---
 runner/app/pipelines/image_to_image.py | 6 ++++++
 runner/dl_checkpoints.sh               | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
index efa4a958..69fd0456 100644
--- a/runner/app/pipelines/image_to_image.py
+++ b/runner/app/pipelines/image_to_image.py
@@ -3,6 +3,7 @@
 
 from diffusers import (
     AutoPipelineForImage2Image,
+    StableDiffusionInstructPix2PixPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
     EulerDiscreteScheduler,
@@ -22,6 +23,7 @@
 logger = logging.getLogger(__name__)
 
 SDXL_LIGHTNING_MODEL_ID = "ByteDance/SDXL-Lightning"
+PIX2PIX_MODEL_ID = "timbrooks/instruct-pix2pix"
 
 
 class ImageToImagePipeline(Pipeline):
@@ -87,6 +89,10 @@ def __init__(self, model_id: str):
             self.ldm.scheduler = EulerDiscreteScheduler.from_config(
                 self.ldm.scheduler.config, timestep_spacing="trailing"
             )
+        elif PIX2PIX_MODEL_ID in model_id:
+            self.ldm = StableDiffusionInstructPix2PixPipeline.from_pretrained(
+                model_id, **kwargs
+            ).to(torch_device)
         else:
             self.ldm = AutoPipelineForImage2Image.from_pretrained(
                 model_id, **kwargs
diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
index 33751bad..c922522c 100755
--- a/runner/dl_checkpoints.sh
+++ b/runner/dl_checkpoints.sh
@@ -60,6 +60,7 @@ if [ "$MODE" = "alpha" ]; then
     
     # Download text-to-image and image-to-image models.
     huggingface-cli download ByteDance/SDXL-Lightning --include "*unet.safetensors" --exclude "*lora.safetensors*" --cache-dir models
+    huggingface-cli download timbrooks/instruct-pix2pix --include "*fp16.safetensors" --exclude "*lora.safetensors*" --cache-dir models
     
     # Download image-to-video models (token-gated).
     printf "\nDownloading token-gated models...\n"
@@ -80,6 +81,9 @@ else
     # Download image-to-video models.
     huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --include "*.fp16.safetensors" "*.json" --cache-dir models
     
+    # Download text-to-video models.
+    huggingface-cli download ali-vilab/text-to-video-ms-1.7b --include "*.fp16.safetensors" "*.json" --cache-dir models
+
     # Download image-to-video models (token-gated).
     printf "\nDownloading token-gated models...\n"
     check_hf_auth

From c648ea2e3aa08d901a7b30c178004f75aaff5457 Mon Sep 17 00:00:00 2001
From: Marco van Dijk <marco@stronk.rocks>
Date: Thu, 11 Apr 2024 22:33:55 +0200
Subject: [PATCH 2/5] Remove text-to-vid model

---
 runner/dl_checkpoints.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
index c922522c..2852d1b8 100755
--- a/runner/dl_checkpoints.sh
+++ b/runner/dl_checkpoints.sh
@@ -81,9 +81,6 @@ else
     # Download image-to-video models.
     huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --include "*.fp16.safetensors" "*.json" --cache-dir models
     
-    # Download text-to-video models.
-    huggingface-cli download ali-vilab/text-to-video-ms-1.7b --include "*.fp16.safetensors" "*.json" --cache-dir models
-
     # Download image-to-video models (token-gated).
     printf "\nDownloading token-gated models...\n"
     check_hf_auth

From 8fa4d91ca332b5b5fcc8176ebd074ee243b31910 Mon Sep 17 00:00:00 2001
From: Marco van Dijk <marco@stronk.rocks>
Date: Sun, 14 Apr 2024 11:48:56 +0200
Subject: [PATCH 3/5] Tweak pix2pix

---
 runner/app/pipelines/image_to_image.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
index 69fd0456..1a279ceb 100644
--- a/runner/app/pipelines/image_to_image.py
+++ b/runner/app/pipelines/image_to_image.py
@@ -12,6 +12,7 @@
 from huggingface_hub import file_download, hf_hub_download
 import torch
 import PIL
+import random
 from typing import List
 import logging
 import os
@@ -90,6 +91,8 @@ def __init__(self, model_id: str):
                 self.ldm.scheduler.config, timestep_spacing="trailing"
             )
         elif PIX2PIX_MODEL_ID in model_id:
+            kwargs["torch_dtype"] = torch.float16
+            kwargs["variant"] = "fp16"
             self.ldm = StableDiffusionInstructPix2PixPipeline.from_pretrained(
                 model_id, **kwargs
             ).to(torch_device)
@@ -148,6 +151,10 @@ def __call__(self, prompt: str, image: PIL.Image, **kwargs) -> List[PIL.Image]:
             else:
                 # Default to 2step
                 kwargs["num_inference_steps"] = 2
+        elif PIX2PIX_MODEL_ID in self.model_id:
+            kwargs["guidance_scale"] = round(random.uniform(6.0, 9.0), ndigits=2)
+            kwargs["image_guidance_scale"] = round(random.uniform(1.2, 1.8), ndigits=2)
+            kwargs["num_inference_steps"] = 50
 
         return self.ldm(prompt, image=image, **kwargs).images
 

From 7931c0300f630c34a5bf1e72b439255eb814cf8f Mon Sep 17 00:00:00 2001
From: Marco van Dijk <marco@stronk.rocks>
Date: Sun, 14 Apr 2024 11:51:31 +0200
Subject: [PATCH 4/5] Do not randomize guidance scale

---
 runner/app/pipelines/image_to_image.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
index 1a279ceb..5f6e8b6f 100644
--- a/runner/app/pipelines/image_to_image.py
+++ b/runner/app/pipelines/image_to_image.py
@@ -152,7 +152,6 @@ def __call__(self, prompt: str, image: PIL.Image, **kwargs) -> List[PIL.Image]:
                 # Default to 2step
                 kwargs["num_inference_steps"] = 2
         elif PIX2PIX_MODEL_ID in self.model_id:
-            kwargs["guidance_scale"] = round(random.uniform(6.0, 9.0), ndigits=2)
             kwargs["image_guidance_scale"] = round(random.uniform(1.2, 1.8), ndigits=2)
             kwargs["num_inference_steps"] = 50
 

From 76d59bb9fb26ac9517764bba866becf8f226c74c Mon Sep 17 00:00:00 2001
From: Marco van Dijk <marco@stronk.rocks>
Date: Mon, 15 Apr 2024 20:11:35 +0200
Subject: [PATCH 5/5] Make image guidance scale a param

---
 runner/app/pipelines/image_to_image.py |  6 ++--
 runner/app/routes/image_to_image.py    |  2 ++
 runner/openapi.json                    |  5 ++++
 worker/runner.gen.go                   | 39 +++++++++++++-------------
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
index 5f6e8b6f..4f9d348e 100644
--- a/runner/app/pipelines/image_to_image.py
+++ b/runner/app/pipelines/image_to_image.py
@@ -152,8 +152,10 @@ def __call__(self, prompt: str, image: PIL.Image, **kwargs) -> List[PIL.Image]:
                 # Default to 2step
                 kwargs["num_inference_steps"] = 2
         elif PIX2PIX_MODEL_ID in self.model_id:
-            kwargs["image_guidance_scale"] = round(random.uniform(1.2, 1.8), ndigits=2)
-            kwargs["num_inference_steps"] = 50
+            if "image_guidance_scale" not in kwargs:
+                kwargs["image_guidance_scale"] = round(random.uniform(1.2, 1.8), ndigits=2)
+            if "num_inference_steps" not in kwargs:
+                kwargs["num_inference_steps"] = 50
 
         return self.ldm(prompt, image=image, **kwargs).images
 
diff --git a/runner/app/routes/image_to_image.py b/runner/app/routes/image_to_image.py
index 6af61bca..2a4f60af 100644
--- a/runner/app/routes/image_to_image.py
+++ b/runner/app/routes/image_to_image.py
@@ -36,6 +36,7 @@ async def image_to_image(
     model_id: Annotated[str, Form()] = "",
     strength: Annotated[float, Form()] = 0.8,
     guidance_scale: Annotated[float, Form()] = 7.5,
+    image_guidance_scale: Annotated[float, Form()] = 0,
     negative_prompt: Annotated[str, Form()] = "",
     seed: Annotated[int, Form()] = None,
     num_images_per_prompt: Annotated[int, Form()] = 1,
@@ -80,6 +81,7 @@ async def image_to_image(
             image=image,
             strength=strength,
             guidance_scale=guidance_scale,
+            image_guidance_scale=image_guidance_scale,
             negative_prompt=negative_prompt,
             seed=seed,
             num_images_per_prompt=num_images_per_prompt,
diff --git a/runner/openapi.json b/runner/openapi.json
index 57dcc4a0..62270e7a 100644
--- a/runner/openapi.json
+++ b/runner/openapi.json
@@ -255,6 +255,11 @@
             "title": "Guidance Scale",
             "default": 7.5
           },
+          "image_guidance_scale": {
+            "type": "number",
+            "title": "Guidance Scale",
+            "default": 7.5
+          },
           "negative_prompt": {
             "type": "string",
             "title": "Negative Prompt",
diff --git a/worker/runner.gen.go b/worker/runner.gen.go
index 9e2e74b3..2fb431a0 100644
--- a/worker/runner.gen.go
+++ b/worker/runner.gen.go
@@ -35,6 +35,7 @@ type APIError struct {
 type BodyImageToImageImageToImagePost struct {
 	GuidanceScale      *float32           `json:"guidance_scale,omitempty"`
 	Image              openapi_types.File `json:"image"`
+	ImageGuidanceScale *float32           `json:"image_guidance_scale,omitempty"`
 	ModelId            *string            `json:"model_id,omitempty"`
 	NegativePrompt     *string            `json:"negative_prompt,omitempty"`
 	NumImagesPerPrompt *int               `json:"num_images_per_prompt,omitempty"`
@@ -1077,25 +1078,25 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl
 // Base64 encoded, gzipped, json marshaled Swagger object
 var swaggerSpec = []string{
 
-	"H4sIAAAAAAAC/+xXS2/jNhD+KwTbozd23E1T+Jb0tUabbhC720MQGIw0lrkrkSw5TNcI/N8LkrZEvSqn",
-	"yKZAkZNew5lvZr556JEmslBSgEBDZ4/UJBsomL+9uJ7/qLXU7l5pqUAjB/+lMJm7IMcc6IxemYyOKG6V",
-	"ezCoucjobjeiGv60XENKZ7f+yN2oPFLqLs/J+4+QIN2N6KVMtytesAxWKPc3jUclDbZhZZanTCSwMglz",
-	"Vh5pCmtmc6Sz85OzyvjPezmy8HIlBGGLe9AOgrfiFKylLhjSGb3nguktrZTMvUjL7REtZAr5iqc1+zQ6",
-	"eeUEyDztOiwgY8gfYKW0LBT26vhtL0eug1yXKluEaJmVAt2l8DTSZwviPTLkGnRLKxcIWQhNpedwth+C",
-	"AUhjyYV77lJqUIPIcFODNzn5rgK4OEi0stUgmjqgCTmMOHcsrwYp+cBTkM3HbkqulanzsILzkzKdsdgA",
-	"zzb1RJ2df1udexe+dx39z2hbSORSrO5t8gmwqeR0eh5rcZLk0kvWtEV+CMkNrJjNVj3EmEwj6jphcmEz",
-	"0s+RJ1DxL542zJ1Opm8rc3/47+2TDRoOsK+fQh3se7dcXvd04hSQ8dzdfa1hTWf0q3HVz8f7Zj4uu20T",
-	"5f54BLOy1QPkA8t5ylwSByFxhMIMYWvq21VYfgiaSiBMa7b1PsRomwq6cAPLcfP9BpJPbbwGGdp6ldL3",
-	"v9C49XiBrglX1WRloMO+L7obMEoKA20EoUsfHbErSDmL4xQad1ecWow0ca7rsDpwB0vtiB1bS1bnsdzv",
-	"Oh/cE6yX8RYipAFIB8IlfMal9I5cM81C8L7UVlB15iN68esa8PQ1oOy9T2y2ezARYdq86CDPYCvLZVKr",
-	"Sia279d0dvvY8vGxBfEuKtBfZeLNtEp01FqlwZieAR1eVKIeM1m6t0NF5fwIpvaSUaSOaJ8f3HTqb19r",
-	"zYpG+3piH2vEpNyQguKBvrY3H7tUw9tyyDMysZrjduGgBOxulFwC06DL3yB36D68KpVsEBXdOR1crGWo",
-	"I5Nornx+Z/RCEKZUzkPCCUqirSAXc6K4gpyL4M+BF/wBFIB232+sEN7QA2gTdE1OTk8mLiBSgWCK0xn9",
-	"xr8aUcVw42GPN370+EYHvh5darzxeVpOJupCFuLhT00nE3dJpEAQ/lQEevzROPOHf8GhNMazzwemHpCF",
-	"TRIwZm1zUqbEp8AWhVtNS4ju5dh3qjco35Sr7GGvrrvlK3tf4DTwAQy6HavhV2Fz5IppHLud+E3KkB3v",
-	"2rF/DLs6J1Fb2H3BiNfn9rExH9G3z5n1ck/ssH/JUnITUuLtTqfPare1MrYRVCKkXCvPXsr9uUDQguVk",
-	"AfoBNKl270Pf8TMk7ji3d7u7uCZ8islShmncqA3/tzBYG74LvlRt9P/PvHBt1Hv/a238n2sjMNzXBsJn",
-	"PGJsRGvhP1bGv3e+vXi+DofXAnjeAnAci2fDbvd3AAAA//92bsyPxhcAAA==",
+	"H4sIAAAAAAAC/+xXS2/jNhD+KwTboxM77qYpfEv6WqNNN4jd7SEIBEYay9yVSJaPdA1D/70gKUvUq3K6",
+	"2RwWOek1nPlm5puH9jjmueAMmFZ4sccq3kJO3O3lzfJnKbm090JyAVJTcF9yldqLpjoDvMDXKsUTrHfC",
+	"PigtKUtxUUywhL8NlZDgxZ07cj+pjlS6q3P84QPEGhcTfMWTXURzkkKkeXnTehRc6S6s1NCEsBgiFRNr",
+	"ZY8T2BCTaby4OD2vjf9ayqGVk6sgMJM/gLQQnBWrYMNlTjRe4AfKiNzhWsnSiXTcLs9Gz4cl5wlkEU0a",
+	"OnCA5NoKoGXSB4ZBSjR9hEhIngs9qOOPUg7deLk+VSb30VeRANmn8CzQZ3LkIqTQDciOVso0pN69Ws/h",
+	"7DAEBZCEkiv73KdUaQks1dsGvNnpDzXA1UGiE/EWccUBjedEwOFjeTpK8UeaAG8/9lN8I1STSzWcX4Tq",
+	"jcUWaLptJur84vv63Fv/ve/o55TBZ9E255pyFj2Y+CPotpKz+UWoxUqiKyfZ0Bb4wThVEBGTRgPEmM0D",
+	"6lphdGlSNMyRJ1DxH5q0zJ3N5m9qc3+5792TLRqOsG+YQj3se7te3wx09gQ0oZm9+1bCBi/wN9N6PkzL",
+	"4TCtuncbZXk8gFnbGgDynmQ0ITaJo5CohlyNYWvrK2osP3lNFRAiJdk5H0K0bQV9uIFkevvjFuKPXbxK",
+	"E22aVYrf/YbD1uME+iZmXZO1gR77ruhuQQnOFHQR+C59dMSuIaEkjJNv3H1x6jBShbluwurB7S11I3Zs",
+	"LRmZhXJ/ymx07zBOxlkIkHogPQjX8EmvuXPkhkjig/eltoy6Mx/Ri1/XgKevAVXvfWKzLcEEhOnyooc8",
+	"o60s43GjKgnbvdvgxd2+4+O+A/E+KNDfeezMdEp00lnNQamBAe1f1KIOM1rbt2NFZf3wpkrJIFJHtM/3",
+	"djoNt6+NJHmrfT2xj7ViUm1IXvFIXyvNhy418HYccoyMjaR6t7JQPHY7Sq6ASJDVb5U99OBfVUq2Wgtc",
+	"WB2UbbivIxVLKlx+F/iSISJERn3CkeZIGoYul0hQARll3p8DL+gjCABpv98axpyhR5DK65qdnp3ObEC4",
+	"AEYExQv8nXs1wYLorYM93brR4xoduHq0qXHGl0k1mbANmY+HOzWfzewl5kwDc6cC0NMPypo//FuOpTGc",
+	"fS4wzYCsTByDUhuToSolLgUmz+1qWkG0L6euU51oflKtsoe9uumWq+yywLHnAyhtd6yWX7nJNBVE6qnd",
+	"iU8Sosnxrh37x1A0OamlgeILRrw5t4+N+QS/ec6sV3tij/0rkqBbnxJndz5/VrudlbGLoBZB1Vp5/lLu",
+	"L5kGyUiGViAfQaJ69z70HTdDwo5zd1/chzXhUozW3E/jVm24v4XR2nBd8KVqY/h/5oVro9n7X2vja64N",
+	"z3BXGxo+6SPGRrAW/mdl/H/nu4vn63B4LYDnLQDLsXA2FMW/AQAA//+fOxznFhgAAA==",
 }
 
 // GetSwagger returns the content of the embedded swagger specification file