Skip to content

Commit

Permalink
Merge branch 'video2openpose2' of https://github.com/sdbds/magic-anim…
Browse files Browse the repository at this point in the history
  • Loading branch information
sdbds committed Dec 9, 2023
2 parents d61200a + e1416b5 commit 8657110
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 55 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ pretrained_models
demo/tmp
demo/outputs
huggingface/
venv/
venv/
outputs/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "video_controlnet_aux"]
path = video_controlnet_aux
url = https://github.com/sdbds/video_controlnet_aux
3 changes: 3 additions & 0 deletions configs/prompts/animation.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pretrained_model_path: "pretrained_models/stable-diffusion-v1-5"
pretrained_vae_path: ""
pretrained_controlnet_path: "pretrained_models/MagicAnimate/densepose_controlnet"
openpose_path: "pretrained_models/control_v11p_sd15_openpose"
pretrained_appearance_encoder_path: "pretrained_models/MagicAnimate/appearance_encoder"
pretrained_unet_path: ""

Expand Down Expand Up @@ -40,3 +41,5 @@ max_length: null
video_type: "condition"
invert_video: false
save_individual_videos: false

openpose: false
69 changes: 46 additions & 23 deletions demo/animate.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from magicanimate.models.controlnet import ControlNetModel
from magicanimate.models.appearance_encoder import AppearanceEncoderModel
from magicanimate.models.mutual_self_attention import ReferenceAttentionControl
from magicanimate.models.model_util import load_models
from magicanimate.models.model_util import load_models, torch_gc
from magicanimate.pipelines.pipeline_animation import AnimationPipeline
from magicanimate.utils.util import save_videos_grid
from accelerate.utils import set_seed
Expand All @@ -44,19 +44,23 @@
from pathlib import Path

class MagicAnimate:
def __init__(self, config="configs/prompts/animation.yaml") -> None:
def __init__(self, config="configs/prompts/animation.yaml",controlnet_model="densepose") -> None:
print("Initializing MagicAnimate Pipeline...")
*_, func_args = inspect.getargvalues(inspect.currentframe())
func_args = dict(func_args)

self.config = config

config = OmegaConf.load(config)

inference_config = OmegaConf.load(config.inference_config)

motion_module = config.motion_module

self.controlnet_model = controlnet_model

### >>> create animation pipeline >>> ###
tokenizer, text_encoder, unet, noise_scheduler, vae = load_models(
self.tokenizer, self.text_encoder, self.unet, noise_scheduler, self.vae = load_models(
config.pretrained_model_path,
scheduler_name="",
v2=False,
Expand All @@ -69,15 +73,15 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
# config.pretrained_model_path, subfolder="text_encoder"
# )
if config.pretrained_unet_path:
unet = UNet3DConditionModel.from_pretrained_2d(
self.unet = UNet3DConditionModel.from_pretrained_2d(
config.pretrained_unet_path,
unet_additional_kwargs=OmegaConf.to_container(
inference_config.unet_additional_kwargs
),
)
else:
unet = UNet3DConditionModel.from_pretrained_2d(
unet.config,
self.unet = UNet3DConditionModel.from_pretrained_2d(
self.unet.config,
subfolder=None,
unet_additional_kwargs=OmegaConf.to_container(
inference_config.unet_additional_kwargs
Expand All @@ -93,38 +97,44 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
fusion_blocks=config.fusion_blocks,
)
self.reference_control_reader = ReferenceAttentionControl(
unet,
self.unet,
do_classifier_free_guidance=True,
mode="read",
fusion_blocks=config.fusion_blocks,
)

if config.pretrained_vae_path:
vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
self.vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path)
# else:
# vae = AutoencoderKL.from_pretrained(
# config.pretrained_model_path, subfolder="vae"
# )

### Load controlnet
controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path)
if "openpose" in self.controlnet_model:
self.controlnet = ControlNetModel.from_pretrained(config.openpose_path)
print("Using OpenPose ControlNet")
else:
self.controlnet = ControlNetModel.from_pretrained(config.pretrained_controlnet_path)
print("Using Densepose ControlNet")


vae.to(torch.float16)
unet.to(torch.float16)
text_encoder.to(torch.float16)
controlnet.to(torch.float16)
self.vae.to(torch.float16)
self.unet.to(torch.float16)
self.text_encoder.to(torch.float16)
self.controlnet.to(torch.float16)
self.appearance_encoder.to(torch.float16)

unet.enable_xformers_memory_efficient_attention()
self.unet.enable_xformers_memory_efficient_attention()
self.appearance_encoder.enable_xformers_memory_efficient_attention()
controlnet.enable_xformers_memory_efficient_attention()
self.controlnet.enable_xformers_memory_efficient_attention()

self.pipeline = AnimationPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
controlnet=controlnet,
vae=self.vae,
text_encoder=self.text_encoder,
tokenizer=self.tokenizer,
unet=self.unet,
controlnet=self.controlnet,
scheduler=DDIMScheduler(
**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
),
Expand Down Expand Up @@ -165,7 +175,7 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:
_tmp_[_key] = motion_module_state_dict[key]
else:
_tmp_[key] = motion_module_state_dict[key]
missing, unexpected = unet.load_state_dict(_tmp_, strict=False)
missing, unexpected = self.unet.load_state_dict(_tmp_, strict=False)
assert len(unexpected) == 0
del _tmp_
del motion_module_state_dict
Expand All @@ -175,9 +185,20 @@ def __init__(self, config="configs/prompts/animation.yaml") -> None:

print("Initialization Done!")

def reset_init(instance, *args, **kwargs):
instance.__init__(*args, **kwargs)

def __call__(
self, source_image, motion_sequence, random_seed, step, guidance_scale, size=512
self, source_image, motion_sequence, random_seed, step, guidance_scale, controlnet_model="densepose", size=512,
):
if self.controlnet_model != controlnet_model:
self.vae.to("cpu")
self.unet.to("cpu")
self.text_encoder.to("cpu")
self.controlnet.to("cpu")
self.appearance_encoder.to("cpu")
torch_gc()
self.reset_init(config="configs/prompts/animation.yaml", controlnet_model=controlnet_model)
prompt = n_prompt = ""
random_seed = int(random_seed)
step = int(step)
Expand Down Expand Up @@ -252,3 +273,5 @@ def __call__(
save_videos_grid(samples_per_video, animation_path)

return animation_path


82 changes: 54 additions & 28 deletions demo/gradio_animate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,26 @@

animator = MagicAnimate()

def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale):
return animator(reference_image, motion_sequence_state, seed, steps, guidance_scale)

with gr.Blocks() as demo:
def animate(
reference_image,
motion_sequence_state,
seed,
steps,
guidance_scale,
controlnet_model,
):
return animator(
reference_image,
motion_sequence_state,
seed,
steps,
guidance_scale,
controlnet_model,
)


with gr.Blocks() as demo:
gr.HTML(
"""
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
Expand All @@ -38,44 +53,55 @@ def animate(reference_image, motion_sequence_state, seed, steps, guidance_scale)
</div>
</div>
</div>
""")
"""
)
animation = gr.Video(format="mp4", label="Animation Results", autoplay=True)

with gr.Row():
reference_image = gr.Image(label="Reference Image")
motion_sequence = gr.Video(format="mp4", label="Motion Sequence")
reference_image = gr.Image(label="Reference Image")
motion_sequence = gr.Video(format="mp4", label="Motion Sequence")

with gr.Column():
random_seed = gr.Textbox(label="Random seed", value=1, info="default: -1")
sampling_steps = gr.Textbox(label="Sampling steps", value=25, info="default: 25")
guidance_scale = gr.Textbox(label="Guidance scale", value=7.5, info="default: 7.5")
submit = gr.Button("Animate")
random_seed = gr.Textbox(label="Random seed", value=1, info="default: -1")
sampling_steps = gr.Textbox(
label="Sampling steps", value=25, info="default: 25"
)
guidance_scale = gr.Textbox(
label="Guidance scale", value=7.5, info="default: 7.5"
)
submit = gr.Button("Animate")

def read_video(video):
reader = imageio.get_reader(video)
fps = reader.get_meta_data()['fps']
fps = reader.get_meta_data()["fps"]
return video

def read_image(image, size=512):
return np.array(Image.fromarray(image).resize((size, size)))

# when user uploads a new video
motion_sequence.upload(
read_video,
motion_sequence,
motion_sequence
)
motion_sequence.upload(read_video, motion_sequence, motion_sequence)
# when `first_frame` is updated
reference_image.upload(
read_image,
reference_image,
reference_image
)
reference_image.upload(read_image, reference_image, reference_image)
# when the `submit` button is clicked
submit.click(
animate,
[reference_image, motion_sequence, random_seed, sampling_steps, guidance_scale],
animation
[
reference_image,
motion_sequence,
random_seed,
sampling_steps,
guidance_scale,
gr.Radio(
[
"densepose",
"openpose", # "animalpose"
],
label="Controlnet Model",
value="densepose",
),
],
animation,
)

# Examples
Expand All @@ -93,4 +119,4 @@ def read_image(image, size=512):
)


demo.launch(share=True)
demo.launch(share=True)
Binary file added inputs/cai-xukun.mp4
Binary file not shown.
16 changes: 16 additions & 0 deletions install.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,21 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq ""){
}
}

$install_CNOP = Read-Host "Do you need to download control_v11p_sd15_openpose? If you want use it select y, if you dont want select n. [y/n] (Default is y)"
if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){
if (!(Test-Path -Path "control_v11p_sd15_openpose")) {
Write-Output "Downloading control_v11p_sd15_openpose models..."
git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose
}
}

Write-Output "Installing Video_controlnet_aux..."

git submodule update --recursive --init

Set-Location $PSScriptRoot/video_controlnet_aux
pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple

Write-Output "Install completed"
Read-Host | Out-Null ;
21 changes: 20 additions & 1 deletion install_cn.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (!(Test-Path -Path "venv")) {
.\venv\Scripts\activate

Write-Output "安装依赖..."
pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple
#pip install -U -r requirements-windows.txt -i https://mirror.baidu.com/pypi/simple

Write-Output "检查模型..."

Expand Down Expand Up @@ -40,5 +40,24 @@ if ($install_SD15 -eq "y" -or $install_SD15 -eq "Y" -or $install_SD15 -eq "") {
}
}

$install_CNOP = Read-Host "是否需要下载huggingface的control_v11p_sd15_openpose模型? 若您希望使用openpose选择y,如果不需要选择 n。[y/n] (默认为 y)"
if ($install_CNOP -eq "y" -or $install_CNOP -eq "Y" -or $install_CNOP -eq ""){
if (!(Test-Path -Path "control_v11p_sd15_openpose")) {
Write-Output "下载 control_v11p_sd15_openpose 模型..."
git clone https://huggingface.co/bdsqlsz/control_v11p_sd15_openpose
}
if (Test-Path -Path "control_v11p_sd15_openpose/.git/lfs") {
Remove-Item -Path control_v11p_sd15_openpose/.git/lfs/* -Recurse -Force
}
}

Write-Output "安装Video_controlnet_aux..."

git submodule update --recursive --init

Set-Location $PSScriptRoot/video_controlnet_aux
pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
pip install -r requirements-video.txt -i https://mirror.baidu.com/pypi/simple

Write-Output "安装完毕"
Read-Host | Out-Null ;
7 changes: 7 additions & 0 deletions magicanimate/models/model_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,10 @@ def create_noise_scheduler(
raise ValueError(f"Unknown scheduler name: {name}")

return scheduler

def torch_gc():
import gc; gc.collect()
if torch.cuda.is_available():
with torch.cuda.device("cuda"):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
4 changes: 2 additions & 2 deletions requirements-windows.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ frozenlist==1.4.0
fsspec==2023.6.0
google-auth==2.22.0
google-auth-oauthlib==1.0.0
gradio==3.41.2
gradio-client==0.5.0
gradio
gradio-client
grpcio==1.57.0
h11==0.14.0
httpcore==0.17.3
Expand Down
22 changes: 22 additions & 0 deletions run_VidControlnetAux_gui.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
$input_path="./inputs/cai-xukun.mp4"
$output_path="./outputs/"


Set-Location $PSScriptRoot
.\venv\Scripts\activate

$Env:HF_HOME = "./huggingface"
$Env:XFORMERS_FORCE_DISABLE_TRITON = "1"
#$Env:PYTHONPATH = $PSScriptRoot
$ext_args = [System.Collections.ArrayList]::new()

if ($input_path) {
[void]$ext_args.Add("-i=$input_path")
}

if ($output_path) {
[void]$ext_args.Add("-o=$output_path")
}


python.exe "video_controlnet_aux/src/video_controlnet_aux.py" $ext_args
Loading

2 comments on commit 8657110

@FurkanGozukara
Copy link

@FurkanGozukara FurkanGozukara commented on 8657110 Dec 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you see any improvement with OpenPose over DensePose? I tried custom model (e.g. realistic vision) loading and made 0 difference

@sdbds
Copy link
Owner Author

@sdbds sdbds commented on 8657110 Dec 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you see any improvement with OpenPose over DensePose? I tried custom model (e.g. realistic vision) loading and made 0 difference

i think they use more reference so it cant see more difference

Please sign in to comment.