From 1d435fad5f62dfe280013b3b40f5c53d98b977e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9D=92=E9=BE=8D=E8=81=96=E8=80=85=40bdsqlsz?=
 <qinglongshengzhe@gmail.com>
Date: Wed, 6 Dec 2023 21:58:11 +0800
Subject: [PATCH] fix bugs

---
 configs/prompts/animation.yaml         |  4 +--
 demo/animate.py                        |  4 +--
 magicanimate/models/unet_controlnet.py | 35 +++++++++++++++++++++++++-
 magicanimate/pipelines/animation.py    |  6 ++---
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/configs/prompts/animation.yaml b/configs/prompts/animation.yaml
index d4c15903..1e3a16be 100644
--- a/configs/prompts/animation.yaml
+++ b/configs/prompts/animation.yaml
@@ -1,5 +1,5 @@
-pretrained_model_path: "pretrained_models/stable-diffusion-v1-5"
-pretrained_vae_path: "pretrained_models/sd-vae-ft-mse"
+pretrained_model_path: "D:\\sd-webui-aki-v4.1\\models\\Stable-diffusion\\动漫\\cetusMix_v4.safetensors"
+pretrained_vae_path: ""
 pretrained_controlnet_path: "pretrained_models/MagicAnimate/densepose_controlnet"
 pretrained_appearance_encoder_path: "pretrained_models/MagicAnimate/appearance_encoder"
 pretrained_unet_path: ""
diff --git a/demo/animate.py b/demo/animate.py
index a4a13a7a..bed1c645 100644
--- a/demo/animate.py
+++ b/demo/animate.py
@@ -78,7 +78,7 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
             )
         else:
             unet = UNet3DConditionModel.from_pretrained_2d(
-                unet,
+                unet.config,
                 subfolder=None,
                 unet_additional_kwargs=OmegaConf.to_container(
                     inference_config.unet_additional_kwargs
@@ -99,7 +99,7 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
             mode="read",
             fusion_blocks=config.fusion_blocks,
         )
-        if config.pretrained_vae_path is not None:
+        if config.pretrained_vae_path:
             vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
         # else:
         #    vae = AutoencoderKL.from_pretrained(
diff --git a/magicanimate/models/unet_controlnet.py b/magicanimate/models/unet_controlnet.py
index 14b09c39..c0f76bae 100644
--- a/magicanimate/models/unet_controlnet.py
+++ b/magicanimate/models/unet_controlnet.py
@@ -52,7 +52,7 @@ class UNet3DConditionOutput(BaseOutput):
     sample: torch.FloatTensor
 
 
-class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
+class UNet3DConditionModel(ModelMixin, ConfigMixin):
     _supports_gradient_checkpointing = True
 
     @register_to_config
@@ -571,6 +571,39 @@ def from_pretrained_2d(
             ]
             print(f"### Temporal Module Parameters: {sum(params) / 1e6} M")
         else:
+            config = {
+                "_class_name": "UNet2DConditionModel",
+                "_diffusers_version": "0.6.0",
+                "act_fn": "silu",
+                "attention_head_dim": 8,
+                "block_out_channels": [320, 640, 1280, 1280],
+                "center_input_sample": False,
+                "cross_attention_dim": 768,
+                "down_block_types": [
+                    "CrossAttnDownBlock3D",
+                    "CrossAttnDownBlock3D",
+                    "CrossAttnDownBlock3D",
+                    "DownBlock3D",
+                ],
+                "downsample_padding": 1,
+                "flip_sin_to_cos": True,
+                "freq_shift": 0,
+                "in_channels": 4,
+                "layers_per_block": 2,
+                "mid_block_scale_factor": 1,
+                "norm_eps": 1e-05,
+                "norm_num_groups": 32,
+                "out_channels": 4,
+                "sample_size": 64,
+                "up_block_types": [
+                    "UpBlock3D",
+                    "CrossAttnUpBlock3D",
+                    "CrossAttnUpBlock3D",
+                    "CrossAttnUpBlock3D",
+                ],
+            }
+            config["_class_name"] = cls.__name__
+            model = cls.from_config(config, **unet_additional_kwargs)
             state_dict = pretrained_model_path
             m, u = model.load_state_dict(state_dict, strict=False)
             print(f"### missing keys: {len(m)}; \n### unexpected keys: {len(u)};")
diff --git a/magicanimate/pipelines/animation.py b/magicanimate/pipelines/animation.py
index 33ed132c..7f6cdd41 100644
--- a/magicanimate/pipelines/animation.py
+++ b/magicanimate/pipelines/animation.py
@@ -78,7 +78,7 @@ def main(args):
         v2=False,
         v_pred=False,
     )
-    unet.
+
     # tokenizer = CLIPTokenizer.from_pretrained(
     #    config.pretrained_model_path, subfolder="tokenizer"
     # )
@@ -94,7 +94,7 @@ def main(args):
         )
     else:
         unet = UNet3DConditionModel.from_pretrained_2d(
-            unet,
+            unet.config,
             subfolder=None,
             unet_additional_kwargs=OmegaConf.to_container(
                 inference_config.unet_additional_kwargs
@@ -115,7 +115,7 @@ def main(args):
         mode="read",
         fusion_blocks=config.fusion_blocks,
     )
-    if config.pretrained_vae_path is not None:
+    if config.pretrained_vae_path:
         vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
     # else:
     #    vae = AutoencoderKL.from_pretrained(