finegrain-ai · catwell · Jan 19, 2024 · Jan 19, 2024
diff --git a/README.md b/README.md
@@ -117,10 +117,9 @@ t2i_adapter = SDXLT2IAdapter(
 
 # Tune parameters
 seed = 9752
-first_step = 1
 ip_adapter.set_scale(0.85)
 t2i_adapter.set_scale(0.8)
-sdxl.set_num_inference_steps(50)
+sdxl.set_inference_steps(50, first_step=1)
 sdxl.set_self_attention_guidance(enable=True, scale=0.75)
 
 with no_grad():
@@ -136,11 +135,11 @@ with no_grad():
     t2i_adapter.set_condition_features(features=t2i_adapter.compute_condition_features(condition))
 
     manual_seed(seed=seed)
-    x = sdxl.init_latents(size=(1024, 1024), init_image=init_image, first_step=first_step).to(
+    x = sdxl.init_latents(size=(1024, 1024), init_image=init_image).to(
         device=sdxl.device, dtype=sdxl.dtype
     )
 
-    for step in sdxl.steps[first_step:]:
+    for step in sdxl.steps:
         x = sdxl(
             x,
             step=step,

diff --git a/src/refiners/foundationals/latent_diffusion/model.py b/src/refiners/foundationals/latent_diffusion/model.py
@@ -32,21 +32,21 @@ def __init__(
         self.clip_text_encoder = clip_text_encoder.to(device=self.device, dtype=self.dtype)
         self.scheduler = scheduler.to(device=self.device, dtype=self.dtype)
 
-    def set_num_inference_steps(self, num_inference_steps: int) -> None:
+    def set_inference_steps(self, num_steps: int, first_step: int = 0) -> None:
         initial_diffusion_rate = self.scheduler.initial_diffusion_rate
         final_diffusion_rate = self.scheduler.final_diffusion_rate
         device, dtype = self.scheduler.device, self.scheduler.dtype
         self.scheduler = self.scheduler.__class__(
-            num_inference_steps,
+            num_inference_steps=num_steps,
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
+            first_inference_step=first_step,
         ).to(device=device, dtype=dtype)
 
     def init_latents(
         self,
         size: tuple[int, int],
         init_image: Image.Image | None = None,
-        first_step: int = 0,
         noise: Tensor | None = None,
     ) -> Tensor:
         height, width = size
@@ -59,11 +59,15 @@ def init_latents(
         if init_image is None:
             return noise
         encoded_image = self.lda.encode_image(image=init_image.resize(size=(width, height)))
-        return self.scheduler.add_noise(x=encoded_image, noise=noise, step=self.steps[first_step])
+        return self.scheduler.add_noise(
+            x=encoded_image,
+            noise=noise,
+            step=self.scheduler.first_inference_step,
+        )
 
     @property
     def steps(self) -> list[int]:
-        return self.scheduler.steps
+        return self.scheduler.inference_steps
 
     @abstractmethod
     def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_: Tensor) -> None:

diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py b/src/refiners/foundationals/latent_diffusion/schedulers/ddim.py
@@ -11,6 +11,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ) -> None:
@@ -20,6 +21,7 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
@@ -35,6 +37,8 @@ def _generate_timesteps(self) -> Tensor:
         return timesteps.flip(0)
 
     def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor:
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         timestep, previous_timestep = (
             self.timesteps[step],
             (

diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py b/src/refiners/foundationals/latent_diffusion/schedulers/ddpm.py
@@ -15,13 +15,15 @@ def __init__(
         num_train_timesteps: int = 1_000,
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
     ) -> None:
         super().__init__(
             num_inference_steps=num_inference_steps,
             num_train_timesteps=num_train_timesteps,
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
+            first_inference_step=first_inference_step,
             device=device,
         )
 

diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py b/src/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py
@@ -24,6 +24,7 @@ def __init__(
         final_diffusion_rate: float = 1.2e-2,
         last_step_first_order: bool = False,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ):
@@ -33,6 +34,7 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
@@ -100,12 +102,14 @@ def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | N
         backward Euler update, which is a numerical method commonly used to solve ordinary differential equations
         (ODEs).
         """
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         current_timestep = self.timesteps[step]
         scale_factor, noise_ratio = self.cumulative_scale_factors[current_timestep], self.noise_std[current_timestep]
         estimated_denoised_data = (x - noise_ratio * noise) / scale_factor
         self.estimated_data.append(estimated_denoised_data)
 
-        if step == 0 or (self.last_step_first_order and step == self.num_inference_steps - 1):
+        if step == self.first_inference_step or (self.last_step_first_order and step == self.num_inference_steps - 1):
             return self.dpm_solver_first_order_update(x=x, noise=estimated_denoised_data, step=step)
 
         return self.multistep_dpm_solver_second_order_update(x=x, step=step)
diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/euler.py b/src/refiners/foundationals/latent_diffusion/schedulers/euler.py
@@ -13,6 +13,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ):
@@ -24,6 +25,7 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
@@ -64,6 +66,8 @@ def __call__(
         s_tmax: float = float("inf"),
         s_noise: float = 1.0,
     ) -> Tensor:
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         sigma = self.sigmas[step]
 
         gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0

diff --git a/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py b/src/refiners/foundationals/latent_diffusion/schedulers/scheduler.py
@@ -33,6 +33,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: DType = float32,
     ):
@@ -43,6 +44,7 @@ def __init__(
         self.initial_diffusion_rate = initial_diffusion_rate
         self.final_diffusion_rate = final_diffusion_rate
         self.noise_schedule = noise_schedule
+        self.first_inference_step = first_inference_step
         self.scale_factors = self.sample_noise_schedule()
         self.cumulative_scale_factors = sqrt(self.scale_factors.cumprod(dim=0))
         self.noise_std = sqrt(1.0 - self.scale_factors.cumprod(dim=0))
@@ -68,9 +70,13 @@ def _generate_timesteps(self) -> Tensor:
         ...
 
     @property
-    def steps(self) -> list[int]:
+    def all_steps(self) -> list[int]:
         return list(range(self.num_inference_steps))
 
+    @property
+    def inference_steps(self) -> list[int]:
+        return self.all_steps[self.first_inference_step :]
+
     def scale_model_input(self, x: Tensor, step: int) -> Tensor:
         """
         For compatibility with schedulers that need to scale the input according to the current timestep.