From 3c2eaf48db6902ae2135853f828359bb823f6e44 Mon Sep 17 00:00:00 2001 From: FlyingFathead Date: Sat, 7 Sep 2024 23:11:12 +0300 Subject: [PATCH 1/4] Fixed low VRAM mode and CPU option, added image check --- scripts/demo/streamlit_helpers.py | 52 +++++++++++++++++-------------- scripts/demo/video_sampling.py | 6 ++++ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/scripts/demo/streamlit_helpers.py b/scripts/demo/streamlit_helpers.py index e79fc193..759cc94e 100644 --- a/scripts/demo/streamlit_helpers.py +++ b/scripts/demo/streamlit_helpers.py @@ -40,6 +40,12 @@ from torchvision import transforms from torchvision.utils import make_grid, save_image +# Additional options for lower end setups +USE_CUDA = True # Set this to `False`` if you want to force CPU-only mode +lowvram_mode = False # Set to `True` to enable low VRAM mode +# (low VRAM mode = float32 => float16, tested to work great on RTX 3060 w/ 12GB VRAM) + +device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") @st.cache_resource() def init_st(version_dict, load_ckpt=True, load_filter=True): @@ -59,35 +65,29 @@ def init_st(version_dict, load_ckpt=True, load_filter=True): state["filter"] = DeepFloydDataFiltering(verbose=False) return state - def load_model(model): - model.cuda() - - -lowvram_mode = False - + device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") + model.to(device) def set_lowvram_mode(mode): global lowvram_mode lowvram_mode = mode - def initial_model_load(model): + device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") global lowvram_mode if lowvram_mode: - model.model.half() + model.model.half().to(device) else: - model.cuda() + model.to(device) return model - def unload_model(model): global lowvram_mode - if lowvram_mode: - model.cpu() + if lowvram_mode or not USE_CUDA: + model.cpu() # Move model to CPU to free GPU memory torch.cuda.empty_cache() - def load_model_from_config(config, ckpt=None, verbose=True): model = instantiate_from_config(config.model) @@ -497,13 +497,14 @@ def load_img( st.text(f"input min/max/mean: {img.min():.3f}/{img.max():.3f}/{img.mean():.3f}") return img - def get_init_img(batch_size=1, key=None): - init_image = load_img(key=key).cuda() + device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") + + init_image = load_img(key=key).to(device) # Use `to(device)` to move to the correct device init_image = repeat(init_image, "1 ... -> b ...", b=batch_size) + return init_image - def do_sample( model, sampler, @@ -529,9 +530,9 @@ def do_sample( st.text("Sampling") outputs = st.empty() - precision_scope = autocast + precision_scope = autocast if USE_CUDA else lambda device: device with torch.no_grad(): - with precision_scope("cuda"): + with precision_scope("cuda" if USE_CUDA else "cpu"): with model.ema_scope(): if T is not None: num_samples = [num_samples, T] @@ -754,7 +755,7 @@ def do_img2img( outputs = st.empty() precision_scope = autocast with torch.no_grad(): - with precision_scope("cuda"): + with precision_scope("cuda" if USE_CUDA else "cpu"): with model.ema_scope(): load_model(model.conditioner) batch, batch_uc = get_batch( @@ -783,20 +784,25 @@ def do_img2img( noise = torch.randn_like(z) - sigmas = sampler.discretization(sampler.num_steps).cuda() + # Move sigmas to the correct device (CUDA or CPU) + sigmas = sampler.discretization(sampler.num_steps).to(device) sigma = sigmas[0] st.info(f"all sigmas: {sigmas}") st.info(f"noising sigma: {sigma}") + + # Offset noise level handling if offset_noise_level > 0.0: noise = noise + offset_noise_level * append_dims( - torch.randn(z.shape[0], device=z.device), z.ndim + torch.randn(z.shape[0], device=device), z.ndim ) + + # Add noise handling if add_noise: - noised_z = z + noise * append_dims(sigma, z.ndim).cuda() + noised_z = z + noise * append_dims(sigma, z.ndim).to(device) noised_z = noised_z / torch.sqrt( 1.0 + sigmas[0] ** 2.0 - ) # Note: hardcoded to DDPM-like scaling. need to generalize later. + ) # Hardcoded to DDPM-like scaling; generalize if needed else: noised_z = z / torch.sqrt(1.0 + sigmas[0] ** 2.0) diff --git a/scripts/demo/video_sampling.py b/scripts/demo/video_sampling.py index 1f4fcfc4..786baa11 100644 --- a/scripts/demo/video_sampling.py +++ b/scripts/demo/video_sampling.py @@ -180,6 +180,12 @@ if mode == "img2vid": img = load_img_for_prediction(W, H) + + # Check if the image is None and use a dummy image if necessary + if img is None: + st.warning("No image provided. Using a dummy tensor for initialization.") + img = torch.zeros([1, 3, H, W]).to(device) # Dummy tensor + if "sv3d" in version: cond_aug = 1e-5 else: From 7d2cc5b6baa5072dc893c1963f74133af497f2ea Mon Sep 17 00:00:00 2001 From: FlyingFathead Date: Sun, 8 Sep 2024 00:20:00 +0300 Subject: [PATCH 2/4] Fix MP4 video output with FFmpeg integration in `save_video_as_grid_and_mp4` --- requirements/pt2.txt | 1 + scripts/demo/streamlit_helpers.py | 35 +++++++++++++++++++------------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/requirements/pt2.txt b/requirements/pt2.txt index 824473ab..0a77ad05 100644 --- a/requirements/pt2.txt +++ b/requirements/pt2.txt @@ -40,3 +40,4 @@ wheel>=0.41.0 xformers>=0.0.20 gradio streamlit-keyup==0.2.0 +imageio[ffmpeg]==2.26.1 diff --git a/scripts/demo/streamlit_helpers.py b/scripts/demo/streamlit_helpers.py index 759cc94e..d5fb0ac4 100644 --- a/scripts/demo/streamlit_helpers.py +++ b/scripts/demo/streamlit_helpers.py @@ -899,29 +899,38 @@ def load_img_for_prediction( st.image(pil_image) return image.to(device) * 2.0 - 1.0 - def save_video_as_grid_and_mp4( video_batch: torch.Tensor, save_path: str, T: int, fps: int = 5 ): + # Check if ffmpeg is available at the system level + if not shutil.which("ffmpeg"): + raise RuntimeError("System-level FFmpeg not found. Please install it and ensure it's in your PATH.") + + # Check if the imageio-ffmpeg package is installed and functioning + if imageio.plugins.ffmpeg.get_ffmpeg_version() is None: + raise RuntimeError("The imageio-ffmpeg package is not correctly installed. Use 'pip install imageio[ffmpeg]' to install it.") + os.makedirs(save_path, exist_ok=True) base_count = len(glob(os.path.join(save_path, "*.mp4"))) video_batch = rearrange(video_batch, "(b t) c h w -> b t c h w", t=T) video_batch = embed_watermark(video_batch) + for vid in video_batch: save_image(vid, fp=os.path.join(save_path, f"{base_count:06d}.png"), nrow=4) video_path = os.path.join(save_path, f"{base_count:06d}.mp4") - vid = ( - (rearrange(vid, "t c h w -> t h w c") * 255).cpu().numpy().astype(np.uint8) - ) - imageio.mimwrite(video_path, vid, fps=fps) - - video_path_h264 = video_path[:-4] + "_h264.mp4" - os.system(f"ffmpeg -i '{video_path}' -c:v libx264 '{video_path_h264}'") - with open(video_path_h264, "rb") as f: - video_bytes = f.read() - os.remove(video_path_h264) - st.video(video_bytes) - + vid = (rearrange(vid, "t c h w -> t h w c") * 255).cpu().numpy().astype(np.uint8) + + # Use the correct writer for MP4 format + writer = imageio.get_writer(video_path, fps=fps, format='ffmpeg', codec='libx264') + for frame in vid: + writer.append_data(frame) + writer.close() + + # Confirm that the file was created + if os.path.exists(video_path): + print(f"Video saved successfully at: {video_path}") + base_count += 1 + \ No newline at end of file From 2397c78cb7274fdfff02d415cbf200f1eedc140c Mon Sep 17 00:00:00 2001 From: FlyingFathead Date: Sun, 8 Sep 2024 00:56:49 +0300 Subject: [PATCH 3/4] Fix bug in MP4 video output with FFmpeg integration --- scripts/demo/streamlit_helpers.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/demo/streamlit_helpers.py b/scripts/demo/streamlit_helpers.py index d5fb0ac4..e1fd64fb 100644 --- a/scripts/demo/streamlit_helpers.py +++ b/scripts/demo/streamlit_helpers.py @@ -7,6 +7,7 @@ import cv2 import imageio import numpy as np +import shutil import streamlit as st import torch import torch.nn as nn @@ -902,14 +903,15 @@ def load_img_for_prediction( def save_video_as_grid_and_mp4( video_batch: torch.Tensor, save_path: str, T: int, fps: int = 5 ): - # Check if ffmpeg is available at the system level + # Check if FFmpeg is available + try: + import imageio_ffmpeg + except ImportError: + raise RuntimeError("FFmpeg support is not installed. Use 'pip install imageio[ffmpeg]' to install it.") + if not shutil.which("ffmpeg"): raise RuntimeError("System-level FFmpeg not found. Please install it and ensure it's in your PATH.") - # Check if the imageio-ffmpeg package is installed and functioning - if imageio.plugins.ffmpeg.get_ffmpeg_version() is None: - raise RuntimeError("The imageio-ffmpeg package is not correctly installed. Use 'pip install imageio[ffmpeg]' to install it.") - os.makedirs(save_path, exist_ok=True) base_count = len(glob(os.path.join(save_path, "*.mp4"))) From 45749bf1a5acd2d54ae6149c1715db98deb2f8c7 Mon Sep 17 00:00:00 2001 From: FlyingFathead Date: Sun, 13 Oct 2024 20:28:58 +0300 Subject: [PATCH 4/4] Refactor device variable usage and specify imageio[ffmpeg] version --- scripts/demo/streamlit_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/demo/streamlit_helpers.py b/scripts/demo/streamlit_helpers.py index e1fd64fb..47fa2fc6 100644 --- a/scripts/demo/streamlit_helpers.py +++ b/scripts/demo/streamlit_helpers.py @@ -67,7 +67,7 @@ def init_st(version_dict, load_ckpt=True, load_filter=True): return state def load_model(model): - device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") + global device # Use the global device variable model.to(device) def set_lowvram_mode(mode): @@ -75,7 +75,7 @@ def set_lowvram_mode(mode): lowvram_mode = mode def initial_model_load(model): - device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu") + global device # Use the global device variable global lowvram_mode if lowvram_mode: model.model.half().to(device)