diff --git a/runner/app/pipelines/image_to_video.py b/runner/app/pipelines/image_to_video.py
index 5967a672..529821ba 100644
--- a/runner/app/pipelines/image_to_video.py
+++ b/runner/app/pipelines/image_to_video.py
@@ -8,6 +8,7 @@
 from typing import List
 import logging
 import os
+import time
 
 from PIL import ImageFile
 
@@ -15,6 +16,9 @@
 
 logger = logging.getLogger(__name__)
 
+WARMUP_ITERATIONS = 3  # Warm-up calls count when SFAST is enabled.
+WARMUP_BATCH_SIZE = 3  # Max batch size for warm-up calls when SFAST is enabled.
+
 
 class ImageToVideoPipeline(Pipeline):
     def __init__(self, model_id: str):
@@ -49,6 +53,32 @@ def __init__(self, model_id: str):
 
             self.ldm = compile_model(self.ldm)
 
+            # Retrieve default model params.
+            warmup_kwargs = {
+                "image": PIL.Image.new("RGB", (512, 512)),
+                "height": 512,
+                "width": 512,
+            }
+
+            # NOTE: Warmup pipeline.
+            # The initial calls will trigger compilation and might be very slow.
+            # After that, it should be very fast.
+            # FIXME: This will crash the pipeline if there is not enough VRAM available.
+            logger.info("Warming up pipeline...")
+            import time
+            for ii in range(WARMUP_ITERATIONS):
+                logger.info(f"Warmup iteration {ii + 1}...")
+                t = time.time()
+                try:
+                    self.ldm(**warmup_kwargs).frames
+                except Exception as e:
+                    logger.error(f"ImageToVideoPipeline warmup error: {e}")
+                    logger.exception(e)
+                    # FIXME: When cuda out of memory, we need to reload the full model before it works again :(. torch.cuda.clear_cache() does not work.
+                    # continue
+                    raise e
+                logger.info("Warmup iteration took %s seconds", time.time() - t)
+
     def __call__(self, image: PIL.Image, **kwargs) -> List[List[PIL.Image]]:
         if "decode_chunk_size" not in kwargs:
             kwargs["decode_chunk_size"] = 4
@@ -64,7 +94,12 @@ def __call__(self, image: PIL.Image, **kwargs) -> List[List[PIL.Image]]:
                     torch.Generator(get_torch_device()).manual_seed(s) for s in seed
                 ]
 
-        return self.ldm(image, **kwargs).frames
+        t = time.time()
+        frames = self.ldm(image, **kwargs).frames
+        logger.info("TextToImagePipeline took %s seconds", time.time() - t)
+
+        return frames
+        # return self.ldm(image, **kwargs).frames
 
     def __str__(self) -> str:
         return f"ImageToVideoPipeline model_id={self.model_id}"
diff --git a/runner/app/pipelines/text_to_image.py b/runner/app/pipelines/text_to_image.py
index 4b05e871..ea8c2da8 100644
--- a/runner/app/pipelines/text_to_image.py
+++ b/runner/app/pipelines/text_to_image.py
@@ -14,11 +14,13 @@
 from typing import List
 import logging
 import os
+import time
 
 logger = logging.getLogger(__name__)
 
 SDXL_LIGHTNING_MODEL_ID = "ByteDance/SDXL-Lightning"
-
+WARMUP_ITERATIONS = 3  # Warm-up calls count when SFAST is enabled.
+WARMUP_BATCH_SIZE = 3  # Max batch size for warm-up calls when SFAST is enabled.
 
 class TextToImagePipeline(Pipeline):
     def __init__(self, model_id: str):
@@ -117,9 +119,61 @@ def __init__(self, model_id: str):
                 model_id,
             )
             from app.pipelines.sfast import compile_model
+            from app.routes.text_to_image import TextToImageParams
 
             self.ldm = compile_model(self.ldm)
 
+            # Retrieve default model params.
+            warmup_kwargs = TextToImageParams(
+                prompt="A warmed up pipeline is a happy pipeline"
+            )
+            if (
+                self.model_id == "stabilityai/sdxl-turbo"
+                or self.model_id == "stabilityai/sd-turbo"
+            ):
+                # SD turbo models were trained without guidance_scale so
+                # it should be set to 0
+                warmup_kwargs.guidance_scale = 0.0
+
+                if "num_inference_steps" not in kwargs:
+                    warmup_kwargs.num_inference_steps = 1
+            elif SDXL_LIGHTNING_MODEL_ID in self.model_id:
+                # SDXL-Lightning models should have guidance_scale = 0 and use
+                # the correct number of inference steps for the unet checkpoint loaded
+                warmup_kwargs.guidance_scale = 0.0
+
+                if "2step" in self.model_id:
+                    warmup_kwargs.num_inference_steps = 2
+                elif "4step" in self.model_id:
+                    warmup_kwargs.num_inference_steps = 4
+                elif "8step" in self.model_id:
+                    warmup_kwargs.num_inference_steps = 8
+                else:
+                    # Default to 2step
+                    warmup_kwargs.num_inference_steps = 2
+
+            # NOTE: Warmup pipeline.
+            # The initial calls will trigger compilation and might be very slow.
+            # After that, it should be very fast.
+            # FIXME: This will crash the pipeline if there is not enough VRAM available.
+            logger.info("Warming up pipeline...")
+            import time 
+            for batch in range(WARMUP_BATCH_SIZE):
+                warmup_kwargs.num_images_per_prompt = batch + 1
+                logger.info(f"Warmup with batch {batch + 1}...")
+                for ii in range(WARMUP_ITERATIONS):
+                    logger.info(f"Warmup iteration {ii + 1}...")
+                    t = time.time()
+                    try:
+                        self.ldm(**warmup_kwargs.model_dump()).images[0]
+                    except Exception as e:
+                        logger.error(f"TextToImagePipeline warmup error: {e}")
+                        logger.exception(e)
+                        # FIXME: When cuda out of memory, we need to reload the full model before it works again :(. torch.cuda.clear_cache() does not work.
+                        # continue
+                        raise e
+                    logger.info("Warmup iteration took %s seconds", time.time() - t)
+
     def __call__(self, prompt: str, **kwargs) -> List[PIL.Image]:
         seed = kwargs.pop("seed", None)
         if seed is not None:
@@ -157,7 +211,12 @@ def __call__(self, prompt: str, **kwargs) -> List[PIL.Image]:
                 # Default to 2step
                 kwargs["num_inference_steps"] = 2
 
-        return self.ldm(prompt, **kwargs).images
+        t = time.time()
+        images = self.ldm(prompt, **kwargs).images
+        logger.info("TextToImagePipeline took %s seconds", time.time() - t)
+
+        return images
+        # return self.ldm(prompt, **kwargs).images
 
     def __str__(self) -> str:
         return f"TextToImagePipeline model_id={self.model_id}"
diff --git a/runner/app/routes/text_to_image.py b/runner/app/routes/text_to_image.py
index e811cc9a..d3f9f3c3 100644
--- a/runner/app/routes/text_to_image.py
+++ b/runner/app/routes/text_to_image.py
@@ -25,6 +25,9 @@ class TextToImageParams(BaseModel):
     negative_prompt: str = ""
     seed: int = None
     num_images_per_prompt: int = 1
+    # Model specific parameters.
+    # These are not used by all models.
+    num_inference_steps: int = 1
 
 
 responses = {400: {"model": HTTPError}, 500: {"model": HTTPError}}