diff --git a/.gitignore b/.gitignore index 0eb3e9b4..2bcbc3ac 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ pretrained_models demo/tmp demo/outputs huggingface/ -venv/ \ No newline at end of file +venv/ +outputs/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..2138a611 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "video_controlnet_aux"] + path = video_controlnet_aux + url = https://github.com/sdbds/video_controlnet_aux diff --git a/configs/prompts/animation.yaml b/configs/prompts/animation.yaml index 6aab720d..29d0a567 100644 --- a/configs/prompts/animation.yaml +++ b/configs/prompts/animation.yaml @@ -1,6 +1,7 @@ pretrained_model_path: "pretrained_models/stable-diffusion-v1-5" pretrained_vae_path: "" pretrained_controlnet_path: "pretrained_models/MagicAnimate/densepose_controlnet" +openpose_path: "pretrained_models/control_v11p_sd15_openpose" pretrained_appearance_encoder_path: "pretrained_models/MagicAnimate/appearance_encoder" pretrained_unet_path: "" @@ -40,3 +41,5 @@ max_length: null video_type: "condition" invert_video: false save_individual_videos: false + +openpose: false \ No newline at end of file diff --git a/demo/animate.py b/demo/animate.py index cb660411..54a45ad0 100644 --- a/demo/animate.py +++ b/demo/animate.py @@ -29,7 +29,7 @@ from magicanimate.models.controlnet import ControlNetModel from magicanimate.models.appearance_encoder import AppearanceEncoderModel from magicanimate.models.mutual_self_attention import ReferenceAttentionControl -from magicanimate.models.model_util import load_models +from magicanimate.models.model_util import load_models, torch_gc from magicanimate.pipelines.pipeline_animation import AnimationPipeline from magicanimate.utils.util import save_videos_grid from accelerate.utils import set_seed @@ -44,19 +44,23 @@ from pathlib import Path class MagicAnimate: - def __init__(self, config="configs/prompts/animation.yaml") -> None: + def __init__(self, config="configs/prompts/animation.yaml",controlnet_model="densepose") -> None: print("Initializing MagicAnimate Pipeline...") *_, func_args = inspect.getargvalues(inspect.currentframe()) func_args = dict(func_args) + self.config = config + config = OmegaConf.load(config) - + inference_config = OmegaConf.load(config.inference_config) motion_module = config.motion_module + + self.controlnet_model = controlnet_model ### >>> create animation pipeline >>> ### - tokenizer, text_encoder, unet, noise_scheduler, vae = load_models( + self.tokenizer, self.text_encoder, self.unet, noise_scheduler, self.vae = load_models( config.pretrained_model_path, scheduler_name="", v2=False, @@ -69,15 +73,15 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None: # config.pretrained_model_path, subfolder="text_encoder" # ) if config.pretrained_unet_path: - unet = UNet3DConditionModel.from_pretrained_2d( + self.unet = UNet3DConditionModel.from_pretrained_2d( config.pretrained_unet_path, unet_additional_kwargs=OmegaConf.to_container( inference_config.unet_additional_kwargs ), ) else: - unet = UNet3DConditionModel.from_pretrained_2d( - unet.config, + self.unet = UNet3DConditionModel.from_pretrained_2d( + self.unet.config, subfolder=None, unet_additional_kwargs=OmegaConf.to_container( inference_config.unet_additional_kwargs @@ -93,38 +97,44 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None: fusion_blocks=config.fusion_blocks, ) self.reference_control_reader = ReferenceAttentionControl( - unet, + self.unet, do_classifier_free_guidance=True, mode="read", fusion_blocks=config.fusion_blocks, ) if config.pretrained_vae_path: - vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path) + self.vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path) # else: # vae = AutoencoderKL.from_pretrained( # config.pretrained_model_path, subfolder="vae" # ) ### Load controlnet - controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path) + if "openpose" in self.controlnet_model: + self.controlnet = ControlNetModel.from_pretrained(config.openpose_path) + print("Using OpenPose ControlNet") + else: + self.controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path) + print("Using Densepose ControlNet") + - vae.to(torch.float16) - unet.to(torch.float16) - text_encoder.to(torch.float16) - controlnet.to(torch.float16) + self.vae.to(torch.float16) + self.unet.to(torch.float16) + self.text_encoder.to(torch.float16) + self.controlnet.to(torch.float16) self.appearance_encoder.to(torch.float16) - unet.enable_xformers_memory_efficient_attention() + self.unet.enable_xformers_memory_efficient_attention() self.appearance_encoder.enable_xformers_memory_efficient_attention() - controlnet.enable_xformers_memory_efficient_attention() + self.controlnet.enable_xformers_memory_efficient_attention() self.pipeline = AnimationPipeline( - vae=vae, - text_encoder=text_encoder, - tokenizer=tokenizer, - unet=unet, - controlnet=controlnet, + vae=self.vae, + text_encoder=self.text_encoder, + tokenizer=self.tokenizer, + unet=self.unet, + controlnet=self.controlnet, scheduler=DDIMScheduler( **OmegaConf.to_container(inference_config.noise_scheduler_kwargs) ), @@ -165,7 +175,7 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None: _tmp_[_key] = motion_module_state_dict[key] else: _tmp_[key] = motion_module_state_dict[key] - missing, unexpected = unet.load_state_dict(_tmp_, strict=False) + missing, unexpected = self.unet.load_state_dict(_tmp_, strict=False) assert len(unexpected) == 0 del _tmp_ del motion_module_state_dict @@ -175,9 +185,20 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None: print("Initialization Done!") + def reset_init(instance, *args, **kwargs): + instance.__init__(*args, **kwargs) + def __call__( - self, source_image, motion_sequence, random_seed, step, guidance_scale, size=512 + self, source_image, motion_sequence, random_seed, step, guidance_scale, controlnet_model="densepose", size=512, ): + if self.controlnet_model != controlnet_model: + self.vae.to("cpu") + self.unet.to("cpu") + self.text_encoder.to("cpu") + self.controlnet.to("cpu") + self.appearance_encoder.to("cpu") + torch_gc() + self.reset_init(config="configs/prompts/animation.yaml", controlnet_model=controlnet_model) prompt = n_prompt = "" random_seed = int(random_seed) step = int(step) @@ -252,3 +273,5 @@ def __call__( save_videos_grid(samples_per_video, animation_path) return animation_path + + \ No newline at end of file diff --git a/demo/gradio_animate.py b/demo/gradio_animate.py index 9a932a28..4c825560 100644 --- a/demo/gradio_animate.py +++ b/demo/gradio_animate.py @@ -18,11 +18,26 @@ animator = MagicAnimate() -def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale): - return animator(reference_image, motion_sequence_state, seed, steps, guidance_scale) -with gr.Blocks() as demo: +def animate( + reference_image, + motion_sequence_state, + seed, + steps, + guidance_scale, + controlnet_model, +): + return animator( + reference_image, + motion_sequence_state, + seed, + steps, + guidance_scale, + controlnet_model, + ) + +with gr.Blocks() as demo: gr.HTML( """
@@ -38,44 +53,55 @@ def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale)
- """) + """ + ) animation = gr.Video(format="mp4", label="Animation Results", autoplay=True) - + with gr.Row(): - reference_image = gr.Image(label="Reference Image") - motion_sequence = gr.Video(format="mp4", label="Motion Sequence") - + reference_image = gr.Image(label="Reference Image") + motion_sequence = gr.Video(format="mp4", label="Motion Sequence") + with gr.Column(): - random_seed = gr.Textbox(label="Random seed", value=1, info="default: -1") - sampling_steps = gr.Textbox(label="Sampling steps", value=25, info="default: 25") - guidance_scale = gr.Textbox(label="Guidance scale", value=7.5, info="default: 7.5") - submit = gr.Button("Animate") + random_seed = gr.Textbox(label="Random seed", value=1, info="default: -1") + sampling_steps = gr.Textbox( + label="Sampling steps", value=25, info="default: 25" + ) + guidance_scale = gr.Textbox( + label="Guidance scale", value=7.5, info="default: 7.5" + ) + submit = gr.Button("Animate") def read_video(video): reader = imageio.get_reader(video) - fps = reader.get_meta_data()['fps'] + fps = reader.get_meta_data()["fps"] return video - + def read_image(image, size=512): return np.array(Image.fromarray(image).resize((size, size))) - + # when user uploads a new video - motion_sequence.upload( - read_video, - motion_sequence, - motion_sequence - ) + motion_sequence.upload(read_video, motion_sequence, motion_sequence) # when `first_frame` is updated - reference_image.upload( - read_image, - reference_image, - reference_image - ) + reference_image.upload(read_image, reference_image, reference_image) # when the `submit` button is clicked submit.click( animate, - [reference_image, motion_sequence, random_seed, sampling_steps, guidance_scale], - animation + [ + reference_image, + motion_sequence, + random_seed, + sampling_steps, + guidance_scale, + gr.Radio( + [ + "densepose", + "openpose", # "animalpose" + ], + label="Controlnet Model", + value="densepose", + ), + ], + animation, ) # Examples @@ -93,4 +119,4 @@ def read_image(image, size=512): ) -demo.launch(share=True) \ No newline at end of file +demo.launch(share=True) diff --git a/inputs/cai-xukun.mp4 b/inputs/cai-xukun.mp4 new file mode 100644 index 00000000..88516758 Binary files /dev/null and b/inputs/cai-xukun.mp4 differ diff --git a/install.ps1 b/install.ps1 index d180a6bb..44f576e7 100644 --- a/install.ps1 +++ b/install.ps1 @@ -33,5 +33,21 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq ""){ } } +$install_CNOP = Read-Host "Do you need to download control_v11p_sd15_openpose? If you want use it select y, if you dont want select n. [y/n] (Default is y)" +if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){ + if (!(Test-Path -Path "control_v11p_sd15_openpose")) { + Write-Output "Downloading control_v11p_sd15_openpose models..." + git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose + } +} + +Write-Output "Installing Video_controlnet_aux..." + +git submodule update --recursive --init + +Set-Location $PSScriptRoot/video_controlnet_aux +pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple +pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple + Write-Output "Install completed" Read-Host | Out-Null ; diff --git a/install_cn.ps1 b/install_cn.ps1 index 2b16903a..cb7c3b0f 100644 --- a/install_cn.ps1 +++ b/install_cn.ps1 @@ -9,7 +9,7 @@ if (!(Test-Path -Path "venv")) { .\venv\Scripts\activate Write-Output "安装依赖..." -pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple +#pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple Write-Output "检查模型..." @@ -40,5 +40,24 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq "") { } } +$install_CNOP = Read-Host "是否需要下载huggingface的control_v11p_sd15_openpose模型? 若您希望使用openpose选择y,如果不需要选择 n。[y/n] (默认为 y)" +if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){ + if (!(Test-Path -Path "control_v11p_sd15_openpose")) { + Write-Output "下载 control_v11p_sd15_openpose 模型..." + git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose + } + if (Test-Path -Path "control_v11p_sd15_openpose/.git/lfs") { + Remove-Item -Path control_v11p_sd15_openpose/.git/lfs/* -Recurse -Force + } +} + +Write-Output "安装Video_controlnet_aux..." + +git submodule update --recursive --init + +Set-Location $PSScriptRoot/video_controlnet_aux +pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple +pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple + Write-Output "安装完毕" Read-Host | Out-Null ; diff --git a/magicanimate/models/model_util.py b/magicanimate/models/model_util.py index eed89c28..af1649ce 100644 --- a/magicanimate/models/model_util.py +++ b/magicanimate/models/model_util.py @@ -263,3 +263,10 @@ def create_noise_scheduler( raise ValueError(f"Unknown scheduler name: {name}") return scheduler + +def torch_gc(): + import gc; gc.collect() + if torch.cuda.is_available(): + with torch.cuda.device("cuda"): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() \ No newline at end of file diff --git a/requirements-windows.txt b/requirements-windows.txt index fb64b77e..e08aeb9d 100644 --- a/requirements-windows.txt +++ b/requirements-windows.txt @@ -29,8 +29,8 @@ frozenlist==1.4.0 fsspec==2023.6.0 google-auth==2.22.0 google-auth-oauthlib==1.0.0 -gradio==3.41.2 -gradio-client==0.5.0 +gradio +gradio-client grpcio==1.57.0 h11==0.14.0 httpcore==0.17.3 diff --git a/run_VidControlnetAux_gui.ps1 b/run_VidControlnetAux_gui.ps1 new file mode 100644 index 00000000..6daaaa7d --- /dev/null +++ b/run_VidControlnetAux_gui.ps1 @@ -0,0 +1,22 @@ +$input_path="./inputs/cai-xukun.mp4" +$output_path="./outputs/" + + +Set-Location $PSScriptRoot +.\venv\Scripts\activate + +$Env:HF_HOME = "./huggingface" +$Env:XFORMERS_FORCE_DISABLE_TRITON = "1" +#$Env:PYTHONPATH = $PSScriptRoot +$ext_args = [System.Collections.ArrayList]::new() + +if ($input_path) { + [void]$ext_args.Add("-i=$input_path") +} + +if ($output_path) { + [void]$ext_args.Add("-o=$output_path") +} + + +python.exe "video_controlnet_aux/src/video_controlnet_aux.py" $ext_args diff --git a/video_controlnet_aux b/video_controlnet_aux new file mode 160000 index 00000000..98e3c33a --- /dev/null +++ b/video_controlnet_aux @@ -0,0 +1 @@ +Subproject commit 98e3c33a53e6a5360e73e6af06d0bced33203f0d