update to sfast optimization to i2i and t2i and upscale pipelines

livepeer · Jul 28, 2024 · 950cdf9 · 950cdf9
1 parent ef155aa
commit 950cdf9
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 17 deletions.
diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
@@ -1,6 +1,7 @@
 import logging
 import os
 from enum import Enum
+import time
 from typing import List, Optional, Tuple
 
 import PIL
@@ -30,6 +31,7 @@
 
 logger = logging.getLogger(__name__)
 
+SFAST_WARMUP_ITERATIONS = 2  # Model warm-up iterations when SFAST is enabled.
 
 class ModelName(Enum):
     """Enumeration mapping model names to their corresponding IDs."""
@@ -142,11 +144,29 @@ def __init__(self, model_id: str):
             # Warm-up the pipeline.
             # TODO: Not yet supported for ImageToImagePipeline.
             if os.getenv("SFAST_WARMUP", "true").lower() == "true":
-                logger.warning(
-                    "The 'SFAST_WARMUP' flag is not yet supported for the "
-                    "ImageToImagePipeline and will be ignored. As a result the first "
-                    "call may be slow if 'SFAST' is enabled."
-                )
+                warmup_kwargs = {
+                    "prompt":"A warmed up pipeline is a happy pipeline a short poem by ricksta",
+                    "image": PIL.Image.new("RGB", (576, 1024)),
+                    "strength": 0.8,
+                    "negative_prompt": "No blurry or weird artifacts",
+                    "num_images_per_prompt":4,
+                }
+
+                logger.info("Warming up ImageToImagePipeline pipeline...")
+                total_time = 0
+                for ii in range(SFAST_WARMUP_ITERATIONS):
+                    t = time.time()
+                    try:
+                        self.ldm(**warmup_kwargs).images
+                    except Exception as e:
+                        logger.error(f"ImageToImagePipeline warmup error: {e}")
+                        raise e
+                    iteration_time = time.time() - t
+                    total_time += iteration_time
+                    logger.info(
+                        "Warmup iteration %s took %s seconds", ii + 1, iteration_time
+                    )
+                logger.info("Total warmup time: %s seconds", total_time)
 
         if deepcache_enabled and not (
             is_lightning_model(model_id) or is_turbo_model(model_id)

diff --git a/runner/app/pipelines/text_to_image.py b/runner/app/pipelines/text_to_image.py
@@ -1,6 +1,7 @@
 import logging
 import os
 from enum import Enum
+import time
 from typing import List, Optional, Tuple
 
 import PIL
@@ -26,6 +27,7 @@
 
 logger = logging.getLogger(__name__)
 
+SFAST_WARMUP_ITERATIONS = 2  # Model warm-up iterations when SFAST is enabled.
 
 class ModelName(Enum):
     """Enumeration mapping model names to their corresponding IDs."""
@@ -151,14 +153,31 @@ def __init__(self, model_id: str):
 
             self.ldm = compile_model(self.ldm)
 
-            # Warm-up the pipeline.
-            # TODO: Not yet supported for TextToImagePipeline.
             if os.getenv("SFAST_WARMUP", "true").lower() == "true":
-                logger.warning(
-                    "The 'SFAST_WARMUP' flag is not yet supported for the "
-                    "TextToImagePipeline and will be ignored. As a result the first "
-                    "call may be slow if 'SFAST' is enabled."
-                )
+                # Retrieve default model params.
+                # TODO: Retrieve defaults from Pydantic class in route.
+                warmup_kwargs = {
+                    "prompt": "A happy pipe in the line looking at the wall with words sfast",
+                    "num_images_per_prompt": 4,
+                    "negative_prompt": "No blurry or weird artifacts",
+                }
+
+                logger.info("Warming up TextToImagePipeline pipeline...")
+                total_time = 0
+                for ii in range(SFAST_WARMUP_ITERATIONS):
+                    t = time.time()
+                    try:
+                        self.ldm(**warmup_kwargs).images
+                    except Exception as e:
+                        # FIXME: When out of memory, pipeline is corrupted.
+                        logger.error(f"TextToImagePipeline warmup error: {e}")
+                        raise e
+                    iteration_time = time.time() - t
+                    total_time += iteration_time
+                    logger.info(
+                        "Warmup iteration %s took %s seconds", ii + 1, iteration_time
+                    )
+                logger.info("Total warmup time: %s seconds", total_time)
 
         if deepcache_enabled and not (
             is_lightning_model(model_id) or is_turbo_model(model_id)

diff --git a/runner/app/pipelines/upscale.py b/runner/app/pipelines/upscale.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import time
 from typing import List, Optional, Tuple
 
 import PIL
@@ -21,6 +22,7 @@
 
 logger = logging.getLogger(__name__)
 
+SFAST_WARMUP_ITERATIONS = 2  # Model warm-up iterations when SFAST is enabled.
 
 class UpscalePipeline(Pipeline):
     def __init__(self, model_id: str):
@@ -68,11 +70,29 @@ def __init__(self, model_id: str):
             # Warm-up the pipeline.
             # TODO: Not yet supported for UpscalePipeline.
             if os.getenv("SFAST_WARMUP", "true").lower() == "true":
-                logger.warning(
-                    "The 'SFAST_WARMUP' flag is not yet supported for the "
-                    "UpscalePipeline and will be ignored. As a result the first "
-                    "call may be slow if 'SFAST' is enabled."
-                )
+                # Retrieve default model params.
+                # TODO: Retrieve defaults from Pydantic class in route.
+                warmup_kwargs = {
+                    "prompt": "Upscaling the pipeline with sfast enabled",
+                    "image": PIL.Image.new("RGB", (576, 1024)),
+                }
+
+                logger.info("Warming up ImageToVideoPipeline pipeline...")
+                total_time = 0
+                for ii in range(SFAST_WARMUP_ITERATIONS):
+                    t = time.time()
+                    try:
+                        self.ldm(**warmup_kwargs).images
+                    except Exception as e:
+                        # FIXME: When out of memory, pipeline is corrupted.
+                        logger.error(f"ImageToVideoPipeline warmup error: {e}")
+                        raise e
+                    iteration_time = time.time() - t
+                    total_time += iteration_time
+                    logger.info(
+                        "Warmup iteration %s took %s seconds", ii + 1, iteration_time
+                    )
+                logger.info("Total warmup time: %s seconds", total_time)
 
         if deepcache_enabled and not (
             is_lightning_model(model_id) or is_turbo_model(model_id)