ByteDance/AnimateDiff-Lightning text2vid support

livepeer · Apr 11, 2024 · 02dcf48 · 02dcf48
1 parent d7b9971
commit 02dcf48
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 7 deletions.
diff --git a/runner/app/pipelines/text_to_video.py b/runner/app/pipelines/text_to_video.py
@@ -1,8 +1,9 @@
 from app.pipelines.base import Pipeline
 from app.pipelines.util import get_torch_device, get_model_dir
 
-from diffusers import DiffusionPipeline
-from huggingface_hub import file_download
+from diffusers import AnimateDiffPipeline, MotionAdapter, DiffusionPipeline, EulerDiscreteScheduler
+from huggingface_hub import file_download, hf_hub_download
+from safetensors.torch import load_file
 import torch
 import PIL
 from typing import List
@@ -37,8 +38,17 @@ def __init__(self, model_id: str):
  kwargs["variant"] = "fp16"
 
  self.model_id = model_id
- self.ldm = DiffusionPipeline.from_pretrained(model_id, **kwargs)
- self.ldm.to(get_torch_device())
+
+ if self.model_id == "ByteDance/AnimateDiff-Lightning":
+ adapter = MotionAdapter().to(torch_device, torch.float16)
+ adapter.load_state_dict(load_file(hf_hub_download("ByteDance/AnimateDiff-Lightning" ,f"animatediff_lightning_4step_diffusers.safetensors"), device="cuda"))
+ kwargs["motion_adapter"] = adapter
+ self.ldm = AnimateDiffPipeline.from_pretrained("Lykon/DreamShaper", **kwargs)
+ self.ldm.scheduler = EulerDiscreteScheduler.from_config(self.ldm.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+ self.ldm.to(torch_device)
+ else:
+ self.ldm = DiffusionPipeline.from_pretrained(model_id, **kwargs)
+ self.ldm.to(torch_device)
 
  if os.environ.get("SFAST"):
  logger.info(
@@ -51,9 +61,15 @@ def __init__(self, model_id: str):
 
  def __call__(self, prompt: str, **kwargs) -> List[List[PIL.Image]]:
  # ali-vilab/text-to-video-ms-1.7b has a limited parameter set
- if (
- self.model_id == "ali-vilab/text-to-video-ms-1.7b"
- ):
+ if self.model_id == "ali-vilab/text-to-video-ms-1.7b":
+ if "fps" in kwargs:
+ del kwargs["fps"]
+ if "motion_bucket_id" in kwargs:
+ del kwargs["motion_bucket_id"]
+ if "noise_aug_strength" in kwargs:
+ del kwargs["noise_aug_strength"]
+ elif self.model_id == "ByteDance/AnimateDiff-Lightning":
+ kwargs["step"] = 4
  if "fps" in kwargs:
  del kwargs["fps"]
  if "motion_bucket_id" in kwargs:

diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
@@ -82,6 +82,8 @@ else
 
  # Download text-to-video models.
  huggingface-cli download ali-vilab/text-to-video-ms-1.7b --include "*.fp16.safetensors" "*.json" --cache-dir models
+ huggingface-cli download ByteDance/AnimateDiff-Lightning --include "*.fp16.safetensors" "*.json" --cache-dir models
+ huggingface-cli download Lykon/DreamShaper --include "*.fp16.safetensors" "*.json" --cache-dir models
 
  # Download image-to-video models (token-gated).
  printf "\nDownloading token-gated models...\n"