Merge branch 'video2openpose2' of https://github.com/sdbds/magic-anim…

…ate-for-windows
sdbds · Dec 9, 2023 · 8657110 · 8657110 · FurkanGozukara · Dec 9, 2023
2 parents d61200a + e1416b5
commit 8657110
Show file tree

Hide file tree

Showing 12 changed files with 176 additions and 55 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,4 +12,5 @@ pretrained_models
 demo/tmp
 demo/outputs
 huggingface/
-venv/
+venv/
+outputs/
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "video_controlnet_aux"]
+	path = video_controlnet_aux
+	url = https://github.com/sdbds/video_controlnet_aux
diff --git a/configs/prompts/animation.yaml b/configs/prompts/animation.yaml
@@ -1,6 +1,7 @@
 pretrained_model_path: "pretrained_models/stable-diffusion-v1-5"
 pretrained_vae_path: ""
 pretrained_controlnet_path: "pretrained_models/MagicAnimate/densepose_controlnet"
+openpose_path: "pretrained_models/control_v11p_sd15_openpose"
 pretrained_appearance_encoder_path: "pretrained_models/MagicAnimate/appearance_encoder"
 pretrained_unet_path: ""
 
@@ -40,3 +41,5 @@ max_length: null
 video_type: "condition"
 invert_video: false
 save_individual_videos: false
+
+openpose: false
diff --git a/demo/animate.py b/demo/animate.py
@@ -29,7 +29,7 @@
 from magicanimate.models.controlnet import ControlNetModel
 from magicanimate.models.appearance_encoder import AppearanceEncoderModel
 from magicanimate.models.mutual_self_attention import ReferenceAttentionControl
-from magicanimate.models.model_util import load_models
+from magicanimate.models.model_util import load_models, torch_gc
 from magicanimate.pipelines.pipeline_animation import AnimationPipeline
 from magicanimate.utils.util import save_videos_grid
 from accelerate.utils import set_seed
@@ -44,19 +44,23 @@
 from pathlib import Path
 
 class MagicAnimate:
-    def __init__(self, config="configs/prompts/animation.yaml") -> None:
+    def __init__(self, config="configs/prompts/animation.yaml",controlnet_model="densepose") -> None:
         print("Initializing MagicAnimate Pipeline...")
         *_, func_args = inspect.getargvalues(inspect.currentframe())
         func_args = dict(func_args)
 
+        self.config = config
+
         config = OmegaConf.load(config)
-
+        
         inference_config = OmegaConf.load(config.inference_config)
 
         motion_module = config.motion_module
+
+        self.controlnet_model = controlnet_model
 
         ### >>> create animation pipeline >>> ###
-        tokenizer, text_encoder, unet, noise_scheduler, vae = load_models(
+        self.tokenizer, self.text_encoder, self.unet, noise_scheduler, self.vae = load_models(
             config.pretrained_model_path,
             scheduler_name="",
             v2=False,
@@ -69,15 +73,15 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
         #    config.pretrained_model_path, subfolder="text_encoder"
         # )
         if config.pretrained_unet_path:
-            unet = UNet3DConditionModel.from_pretrained_2d(
+            self.unet = UNet3DConditionModel.from_pretrained_2d(
                 config.pretrained_unet_path,
                 unet_additional_kwargs=OmegaConf.to_container(
                     inference_config.unet_additional_kwargs
                 ),
             )
         else:
-            unet = UNet3DConditionModel.from_pretrained_2d(
-                unet.config,
+            self.unet = UNet3DConditionModel.from_pretrained_2d(
+                self.unet.config,
                 subfolder=None,
                 unet_additional_kwargs=OmegaConf.to_container(
                     inference_config.unet_additional_kwargs
@@ -93,38 +97,44 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
             fusion_blocks=config.fusion_blocks,
         )
         self.reference_control_reader = ReferenceAttentionControl(
-            unet,
+            self.unet,
             do_classifier_free_guidance=True,
             mode="read",
             fusion_blocks=config.fusion_blocks,
         )
 
         if config.pretrained_vae_path:
-            vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
+            self.vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
         # else:
         #    vae = AutoencoderKL.from_pretrained(
         #        config.pretrained_model_path, subfolder="vae"
         #    )
 
         ### Load controlnet
-        controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path)
+        if "openpose" in self.controlnet_model:
+            self.controlnet = ControlNetModel.from_pretrained(config.openpose_path)
+            print("Using OpenPose ControlNet")
+        else:
+            self.controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path)
+            print("Using Densepose ControlNet")
+
 
-        vae.to(torch.float16)
-        unet.to(torch.float16)
-        text_encoder.to(torch.float16)
-        controlnet.to(torch.float16)
+        self.vae.to(torch.float16)
+        self.unet.to(torch.float16)
+        self.text_encoder.to(torch.float16)
+        self.controlnet.to(torch.float16)
         self.appearance_encoder.to(torch.float16)
 
-        unet.enable_xformers_memory_efficient_attention()
+        self.unet.enable_xformers_memory_efficient_attention()
         self.appearance_encoder.enable_xformers_memory_efficient_attention()
-        controlnet.enable_xformers_memory_efficient_attention()
+        self.controlnet.enable_xformers_memory_efficient_attention()
 
         self.pipeline = AnimationPipeline(
-            vae=vae,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-            unet=unet,
-            controlnet=controlnet,
+            vae=self.vae,
+            text_encoder=self.text_encoder,
+            tokenizer=self.tokenizer,
+            unet=self.unet,
+            controlnet=self.controlnet,
             scheduler=DDIMScheduler(
                 **OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
             ),
@@ -165,7 +175,7 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
                         _tmp_[_key] = motion_module_state_dict[key]
                     else:
                         _tmp_[key] = motion_module_state_dict[key]
-            missing, unexpected = unet.load_state_dict(_tmp_, strict=False)
+            missing, unexpected = self.unet.load_state_dict(_tmp_, strict=False)
             assert len(unexpected) == 0
             del _tmp_
         del motion_module_state_dict
@@ -175,9 +185,20 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
 
         print("Initialization Done!")
 
+    def reset_init(instance, *args, **kwargs):
+        instance.__init__(*args, **kwargs)
+
     def __call__(
-        self, source_image, motion_sequence, random_seed, step, guidance_scale, size=512
+        self, source_image, motion_sequence, random_seed, step, guidance_scale, controlnet_model="densepose", size=512,
     ):
+        if self.controlnet_model != controlnet_model:
+            self.vae.to("cpu")
+            self.unet.to("cpu")
+            self.text_encoder.to("cpu")
+            self.controlnet.to("cpu")
+            self.appearance_encoder.to("cpu")
+            torch_gc()
+            self.reset_init(config="configs/prompts/animation.yaml", controlnet_model=controlnet_model)
         prompt = n_prompt = ""
         random_seed = int(random_seed)
         step = int(step)
@@ -252,3 +273,5 @@ def __call__(
         save_videos_grid(samples_per_video, animation_path)
 
         return animation_path
+
+
diff --git a/demo/gradio_animate.py b/demo/gradio_animate.py
@@ -18,11 +18,26 @@
 
 animator = MagicAnimate()
 
-def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale):
-    return animator(reference_image, motion_sequence_state, seed, steps, guidance_scale)
 
-with gr.Blocks() as demo:
+def animate(
+    reference_image,
+    motion_sequence_state,
+    seed,
+    steps,
+    guidance_scale,
+    controlnet_model,
+):
+    return animator(
+        reference_image,
+        motion_sequence_state,
+        seed,
+        steps,
+        guidance_scale,
+        controlnet_model,
+    )
+
 
+with gr.Blocks() as demo:
     gr.HTML(
         """
         <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
@@ -38,44 +53,55 @@ def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale)
             </div>
         </div>
         </div>
-        """)
+        """
+    )
     animation = gr.Video(format="mp4", label="Animation Results", autoplay=True)
-    
+
     with gr.Row():
-        reference_image  = gr.Image(label="Reference Image")
-        motion_sequence  = gr.Video(format="mp4", label="Motion Sequence")
-        
+        reference_image = gr.Image(label="Reference Image")
+        motion_sequence = gr.Video(format="mp4", label="Motion Sequence")
+
         with gr.Column():
-            random_seed         = gr.Textbox(label="Random seed", value=1, info="default: -1")
-            sampling_steps      = gr.Textbox(label="Sampling steps", value=25, info="default: 25")
-            guidance_scale      = gr.Textbox(label="Guidance scale", value=7.5, info="default: 7.5")
-            submit              = gr.Button("Animate")
+            random_seed = gr.Textbox(label="Random seed", value=1, info="default: -1")
+            sampling_steps = gr.Textbox(
+                label="Sampling steps", value=25, info="default: 25"
+            )
+            guidance_scale = gr.Textbox(
+                label="Guidance scale", value=7.5, info="default: 7.5"
+            )
+            submit = gr.Button("Animate")
 
     def read_video(video):
         reader = imageio.get_reader(video)
-        fps = reader.get_meta_data()['fps']
+        fps = reader.get_meta_data()["fps"]
         return video
-    
+
     def read_image(image, size=512):
         return np.array(Image.fromarray(image).resize((size, size)))
-    
+
     # when user uploads a new video
-    motion_sequence.upload(
-        read_video,
-        motion_sequence,
-        motion_sequence
-    )
+    motion_sequence.upload(read_video, motion_sequence, motion_sequence)
     # when `first_frame` is updated
-    reference_image.upload(
-        read_image,
-        reference_image,
-        reference_image
-    )
+    reference_image.upload(read_image, reference_image, reference_image)
     # when the `submit` button is clicked
     submit.click(
         animate,
-        [reference_image, motion_sequence, random_seed, sampling_steps, guidance_scale], 
-        animation
+        [
+            reference_image,
+            motion_sequence,
+            random_seed,
+            sampling_steps,
+            guidance_scale,
+            gr.Radio(
+                [
+                    "densepose",
+                    "openpose",  # "animalpose"
+                ],
+                label="Controlnet Model",
+                value="densepose",
+            ),
+        ],
+        animation,
     )
 
     # Examples
@@ -93,4 +119,4 @@ def read_image(image, size=512):
     )
 
 
-demo.launch(share=True)
+demo.launch(share=True)
diff --git a/inputs/cai-xukun.mp4 b/inputs/cai-xukun.mp4
diff --git a/install.ps1 b/install.ps1
@@ -33,5 +33,21 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq ""){
     }
 }
 
+$install_CNOP = Read-Host "Do you need to download control_v11p_sd15_openpose? If you want use it select y, if you dont want select n. [y/n] (Default is y)"
+if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){
+    if (!(Test-Path -Path "control_v11p_sd15_openpose")) {
+    Write-Output  "Downloading control_v11p_sd15_openpose models..."
+    git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose
+    }
+}
+
+Write-Output "Installing Video_controlnet_aux..."
+
+git submodule update --recursive --init
+
+Set-Location $PSScriptRoot/video_controlnet_aux
+pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple
+
 Write-Output "Install completed"
 Read-Host | Out-Null ;
diff --git a/install_cn.ps1 b/install_cn.ps1
@@ -9,7 +9,7 @@ if (!(Test-Path -Path "venv")) {
 .\venv\Scripts\activate
 
 Write-Output "安装依赖..."
-pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple
+#pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple
 
 Write-Output "检查模型..."
 
@@ -40,5 +40,24 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq "") {
     }
 }
 
+$install_CNOP = Read-Host "是否需要下载huggingface的control_v11p_sd15_openpose模型? 若您希望使用openpose选择y，如果不需要选择 n。[y/n] (默认为 y)"
+if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){
+    if (!(Test-Path -Path "control_v11p_sd15_openpose")) {
+    Write-Output  "下载 control_v11p_sd15_openpose 模型..."
+    git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose
+    }
+    if (Test-Path -Path "control_v11p_sd15_openpose/.git/lfs") {
+        Remove-Item -Path control_v11p_sd15_openpose/.git/lfs/* -Recurse -Force
+    }
+}
+
+Write-Output "安装Video_controlnet_aux..."
+
+git submodule update --recursive --init
+
+Set-Location $PSScriptRoot/video_controlnet_aux
+pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple
+
 Write-Output "安装完毕"
 Read-Host | Out-Null ;
diff --git a/magicanimate/models/model_util.py b/magicanimate/models/model_util.py
@@ -263,3 +263,10 @@ def create_noise_scheduler(
         raise ValueError(f"Unknown scheduler name: {name}")
 
     return scheduler
+
+def torch_gc():
+    import gc; gc.collect()
+    if torch.cuda.is_available():
+        with torch.cuda.device("cuda"):
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
diff --git a/requirements-windows.txt b/requirements-windows.txt
@@ -29,8 +29,8 @@ frozenlist==1.4.0
 fsspec==2023.6.0
 google-auth==2.22.0
 google-auth-oauthlib==1.0.0
-gradio==3.41.2
-gradio-client==0.5.0
+gradio
+gradio-client
 grpcio==1.57.0
 h11==0.14.0
 httpcore==0.17.3

diff --git a/run_VidControlnetAux_gui.ps1 b/run_VidControlnetAux_gui.ps1
@@ -0,0 +1,22 @@
+$input_path="./inputs/cai-xukun.mp4"
+$output_path="./outputs/"
+
+
+Set-Location $PSScriptRoot
+.\venv\Scripts\activate
+
+$Env:HF_HOME = "./huggingface"
+$Env:XFORMERS_FORCE_DISABLE_TRITON = "1"
+#$Env:PYTHONPATH = $PSScriptRoot
+$ext_args = [System.Collections.ArrayList]::new()
+
+if ($input_path) {
+    [void]$ext_args.Add("-i=$input_path")
+}
+
+if ($output_path) {
+    [void]$ext_args.Add("-o=$output_path")
+}
+
+
+python.exe "video_controlnet_aux/src/video_controlnet_aux.py" $ext_args