diff --git a/api/app.py b/api/app.py index deef9ee..54800c2 100644 --- a/api/app.py +++ b/api/app.py @@ -20,6 +20,7 @@ from download import download_model, normalize_model_id import traceback from precision import MODEL_REVISION, MODEL_PRECISION +from device import device, device_id, device_name RUNTIME_DOWNLOADS = os.getenv("RUNTIME_DOWNLOADS") == "1" USE_DREAMBOOTH = os.getenv("USE_DREAMBOOTH") == "1" @@ -58,7 +59,7 @@ def init(): "init", "start", { - "device": torch.cuda.get_device_name(), + "device": device_name, "hostname": os.getenv("HOSTNAME"), "model_id": MODEL_ID, "diffusers": __version__, @@ -329,7 +330,7 @@ def inference(all_inputs: dict) -> dict: last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a}) # Run the model - # with autocast("cuda"): + # with autocast(device_id): # image = pipeline(**model_inputs).images[0] if call_inputs.get("train", None) == "dreambooth": @@ -357,10 +358,10 @@ def inference(all_inputs: dict) -> dict: # Do this after dreambooth as dreambooth accepts a seed int directly. seed = model_inputs.get("seed", None) if seed == None: - generator = torch.Generator(device="cuda") + generator = torch.Generator(device=device) generator.seed() else: - generator = torch.Generator(device="cuda").manual_seed(seed) + generator = torch.Generator(device=device).manual_seed(seed) del model_inputs["seed"] model_inputs.update({"generator": generator}) @@ -375,7 +376,7 @@ def inference(all_inputs: dict) -> dict: # autocast im2img and inpaint which are broken in 0.4.0, 0.4.1 # still broken in 0.5.1 elif call_inputs.get("PIPELINE") != "StableDiffusionPipeline": - with autocast("cuda"): + with autocast(device_id): images = pipeline(**model_inputs).images else: images = pipeline(**model_inputs).images @@ -403,6 +404,7 @@ def inference(all_inputs: dict) -> dict: else: result = result | {"image_base64": images_base64[0]} + # TODO, move and generalize in device.py mem_usage = 0 if torch.cuda.is_available(): mem_usage = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() diff --git a/api/convert_to_diffusers.py b/api/convert_to_diffusers.py index b4bf5ab..10231fa 100644 --- a/api/convert_to_diffusers.py +++ b/api/convert_to_diffusers.py @@ -3,6 +3,7 @@ import subprocess import torch from utils import Storage +from device import device_id MODEL_ID = os.environ.get("MODEL_ID", None) CHECKPOINT_DIR = "/root/.cache/checkpoints" @@ -36,17 +37,13 @@ def main(model_id: str, checkpoint_url: str, checkpoint_config_url: str): "./diffusers/scripts/convert_original_stable_diffusion_to_diffusers.py" ) - gpu = False - if torch.cuda.is_available(): - gpu = True - print("Converting " + fname + " to diffusers model " + model_id + "...", flush=True) subprocess.run( ["pip", "install", "omegaconf", "pytorch_lightning", "tensorboard"], check=True ) subprocess.run(["apt-get", "install", "-y", "wget"], check=True) - if not gpu: + if device_id == "cpu": subprocess.run( [ "sed", diff --git a/api/device.py b/api/device.py new file mode 100644 index 0000000..c83eaeb --- /dev/null +++ b/api/device.py @@ -0,0 +1,32 @@ +import torch + +if torch.cuda.is_available(): + print("[device] CUDA (Nvidia) detected") + device_id = "cuda" + device_name = torch.cuda.get_device_name() +elif torch.backends.mps.is_available(): + print("[device] MPS (MacOS Metal, Apple M1, etc) detected") + device_id = "mps" + device_name = "MPS" +else: + print("[device] CPU only - no GPU detected") + device_id = "cpu" + device_name = "CPU only" + + if not torch.backends.cuda.is_built(): + print( + "CUDA not available because the current PyTorch install was not " + "built with CUDA enabled." + ) + if torch.backends.mps.is_built(): + print( + "MPS not available because the current MacOS version is not 12.3+ " + "and/or you do not have an MPS-enabled device on this machine." + ) + else: + print( + "MPS not available because the current PyTorch install was not " + "built with MPS enabled." + ) + +device = torch.device(device_id) diff --git a/api/download.py b/api/download.py index c8cefd1..561ad41 100644 --- a/api/download.py +++ b/api/download.py @@ -101,7 +101,7 @@ def download_model( "model_revision": model_revision, } ) - # This would be quicker to just model.to("cuda") afterwards, but + # This would be quicker to just model.to(device) afterwards, but # this conveniently logs all the timings (and doesn't happen often) print("download") send("download", "start", {}) diff --git a/api/loadModel.py b/api/loadModel.py index 8290e62..0bac36e 100644 --- a/api/loadModel.py +++ b/api/loadModel.py @@ -3,6 +3,7 @@ from diffusers import pipelines as _pipelines, StableDiffusionPipeline from getScheduler import getScheduler, DEFAULT_SCHEDULER from precision import torch_dtype_from_precision +from device import device import time HF_AUTH_TOKEN = os.getenv("HF_AUTH_TOKEN") @@ -70,7 +71,7 @@ def loadModel(model_id: str, load=True, precision=None, revision=None): if load: to_gpu = time.time() - model.to("cuda") + model.to(device) to_gpu = round((time.time() - to_gpu) * 1000) print(f"Loaded from disk in {from_pretrained} ms, to gpu in {to_gpu} ms") else: